diff --git a/AppImage/components/health-status-modal.tsx b/AppImage/components/health-status-modal.tsx
index b6da6742..439b946f 100644
--- a/AppImage/components/health-status-modal.tsx
+++ b/AppImage/components/health-status-modal.tsx
@@ -3,6 +3,7 @@
 import type React from "react"
 
 import { useState, useEffect, useCallback } from "react"
+import { fetchApi, getApiUrl, getAuthToken } from "@/lib/api-config"
 import { Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle } from "@/components/ui/dialog"
 import { Badge } from "@/components/ui/badge"
 import { Button } from "@/components/ui/button"
@@ -122,10 +123,16 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
       let newOverallStatus = "OK"
       
       // Use the new combined endpoint for fewer round-trips
-      const response = await fetch(getApiUrl("/api/health/full"))
+      const token = getAuthToken()
+      const authHeaders: Record<string, string> = {}
+      if (token) {
+        authHeaders["Authorization"] = `Bearer ${token}`
+      }
+
+      const response = await fetch(getApiUrl("/api/health/full"), { headers: authHeaders })
       if (!response.ok) {
         // Fallback to legacy endpoint
-        const legacyResponse = await fetch(getApiUrl("/api/health/details"))
+        const legacyResponse = await fetch(getApiUrl("/api/health/details"), { headers: authHeaders })
         if (!legacyResponse.ok) throw new Error("Failed to fetch health details")
         const data = await legacyResponse.json()
         setHealthData(data)
@@ -288,15 +295,22 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
     setDismissingKey(errorKey)
 
     try {
-      const response = await fetch(getApiUrl("/api/health/acknowledge"), {
+      const url = getApiUrl("/api/health/acknowledge")
+      const token = getAuthToken()
+      const headers: Record<string, string> = { "Content-Type": "application/json" }
+      if (token) {
+        headers["Authorization"] = `Bearer ${token}`
+      }
+
+      const response = await fetch(url, {
         method: "POST",
-        headers: { "Content-Type": "application/json" },
+        headers,
         body: JSON.stringify({ error_key: errorKey }),
       })
 
       if (!response.ok) {
-        const errorData = await response.json()
-        throw new Error(errorData.error || "Failed to dismiss error")
+        const errorData = await response.json().catch(() => ({}))
+        throw new Error(errorData.error || `Failed to dismiss error (${response.status})`)
       }
 
       await fetchHealthDetails()
@@ -408,10 +422,10 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
               key={checkKey}
               className="flex items-center justify-between gap-1.5 sm:gap-2 text-[10px] sm:text-xs py-1.5 px-2 sm:px-3 rounded-md hover:bg-muted/40 transition-colors"
             >
-              <div className="flex items-center gap-1.5 sm:gap-2 min-w-0 flex-1 overflow-hidden">
-                {getStatusIcon(checkData.status, "sm")}
+              <div className="flex items-start gap-1.5 sm:gap-2 min-w-0 flex-1">
+                <span className="mt-0.5 shrink-0">{getStatusIcon(checkData.status, "sm")}</span>
                 <span className="font-medium shrink-0">{formatCheckLabel(checkKey)}</span>
-                <span className="text-muted-foreground truncate block">{checkData.detail}</span>
+                <span className="text-muted-foreground break-words whitespace-pre-wrap min-w-0">{checkData.detail}</span>
                 {checkData.dismissed && (
                   <Badge variant="outline" className="text-[9px] px-1 py-0 h-4 shrink-0 text-blue-400 border-blue-400/30">
                     Dismissed
@@ -520,8 +534,8 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
             </div>
 
             {healthData.summary && healthData.summary !== "All systems operational" && (
-              <div className="text-sm p-3 rounded-lg bg-muted/20 border overflow-hidden max-w-full">
-                <p className="font-medium text-foreground truncate" title={healthData.summary}>{healthData.summary}</p>
+              <div className="text-xs sm:text-sm p-3 rounded-lg bg-muted/20 border overflow-hidden max-w-full">
+                <p className="font-medium text-foreground break-words whitespace-pre-wrap">{healthData.summary}</p>
               </div>
             )}
 
@@ -559,7 +573,7 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
                           )}
                         </div>
                         {reason && !isExpanded && (
-                          <p className="text-[10px] sm:text-xs text-muted-foreground mt-0.5 truncate" title={reason}>{reason}</p>
+                          <p className="text-[10px] sm:text-xs text-muted-foreground mt-0.5 line-clamp-2 break-words">{reason}</p>
                         )}
                       </div>
                       <div className="flex items-center gap-1 sm:gap-2 shrink-0">
@@ -578,7 +592,7 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
                     {isExpanded && (
                       <div className="border-t border-border/50 bg-muted/5 px-1.5 sm:px-2 py-1.5 overflow-hidden">
                         {reason && (
-                          <p className="text-xs text-muted-foreground px-3 py-1.5 mb-1 break-words">{reason}</p>
+                          <p className="text-xs text-muted-foreground px-3 py-1.5 mb-1 break-words whitespace-pre-wrap">{reason}</p>
                         )}
                         {hasChecks ? (
                           renderChecks(checks, key)
diff --git a/AppImage/components/notification-settings.tsx b/AppImage/components/notification-settings.tsx
new file mode 100644
index 00000000..6d720da3
--- /dev/null
+++ b/AppImage/components/notification-settings.tsx
@@ -0,0 +1,1511 @@
+"use client"
+
+import { useState, useEffect, useCallback } from "react"
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "./ui/card"
+import { Tabs, TabsList, TabsTrigger, TabsContent } from "./ui/tabs"
+import { Input } from "./ui/input"
+import { Label } from "./ui/label"
+import { Badge } from "./ui/badge"
+
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "./ui/select"
+import { fetchApi } from "../lib/api-config"
+import {
+  Bell, BellOff, Send, CheckCircle2, XCircle, Loader2,
+  AlertTriangle, Info, Settings2, Zap, Eye, EyeOff,
+  Trash2, ChevronDown, ChevronUp, ChevronRight, TestTube2, Mail, Webhook,
+  Copy, Server, Shield
+} from "lucide-react"
+
+interface ChannelConfig {
+  enabled: boolean
+  bot_token?: string
+  chat_id?: string
+  url?: string
+  token?: string
+  webhook_url?: string
+  // Email channel fields
+  host?: string
+  port?: string
+  username?: string
+  password?: string
+  tls_mode?: string
+  from_address?: string
+  to_addresses?: string
+  subject_prefix?: string
+}
+
+interface EventTypeInfo {
+  type: string
+  title: string
+  default_enabled: boolean
+}
+
+interface NotificationConfig {
+  enabled: boolean
+  channels: Record<string, ChannelConfig>
+  severity_filter: string
+  event_categories: Record<string, boolean>
+  event_toggles: Record<string, boolean>
+  event_types_by_group: Record<string, EventTypeInfo[]>
+  ai_enabled: boolean
+  ai_provider: string
+  ai_api_key: string
+  ai_model: string
+  hostname: string
+  webhook_secret: string
+  webhook_allowed_ips: string
+  pbs_host: string
+  pve_host: string
+  pbs_trusted_sources: string
+}
+
+interface ServiceStatus {
+  enabled: boolean
+  running: boolean
+  channels: Record<string, boolean>
+  queue_size: number
+  last_sent: string | null
+  total_sent_24h: number
+}
+
+interface HistoryEntry {
+  id: number
+  event_type: string
+  channel: string
+  title: string
+  severity: string
+  sent_at: string
+  success: boolean
+  error_message: string | null
+}
+
+const SEVERITY_OPTIONS = [
+  { value: "critical", label: "Critical only" },
+  { value: "warning", label: "Warning + Critical" },
+  { value: "info", label: "All (Info + Warning + Critical)" },
+]
+
+const EVENT_CATEGORIES = [
+  { key: "system", label: "System", desc: "Startup, shutdown, kernel events" },
+  { key: "vm_ct", label: "VM / CT", desc: "Start, stop, crash, migration" },
+  { key: "backup", label: "Backups", desc: "Backup start, complete, fail" },
+  { key: "resources", label: "Resources", desc: "CPU, memory, temperature" },
+  { key: "storage", label: "Storage", desc: "Disk space, I/O errors, SMART" },
+  { key: "network", label: "Network", desc: "Connectivity, bond, latency" },
+  { key: "security", label: "Security", desc: "Auth failures, fail2ban, firewall" },
+  { key: "cluster", label: "Cluster", desc: "Quorum, split-brain, HA fencing" },
+]
+
+const AI_PROVIDERS = [
+  { value: "openai", label: "OpenAI" },
+  { value: "groq", label: "Groq" },
+]
+
+const DEFAULT_CONFIG: NotificationConfig = {
+  enabled: false,
+  channels: {
+    telegram: { enabled: false },
+    gotify: { enabled: false },
+    discord: { enabled: false },
+    email: { enabled: false },
+  },
+  severity_filter: "all",
+  event_categories: {
+    system: true, vm_ct: true, backup: true, resources: true,
+    storage: true, network: true, security: true, cluster: true,
+  },
+  event_toggles: {},
+  event_types_by_group: {},
+  ai_enabled: false,
+  ai_provider: "openai",
+  ai_api_key: "",
+  ai_model: "",
+  hostname: "",
+  webhook_secret: "",
+  webhook_allowed_ips: "",
+  pbs_host: "",
+  pve_host: "",
+  pbs_trusted_sources: "",
+}
+
+export function NotificationSettings() {
+  const [config, setConfig] = useState<NotificationConfig>(DEFAULT_CONFIG)
+  const [status, setStatus] = useState<ServiceStatus | null>(null)
+  const [history, setHistory] = useState<HistoryEntry[]>([])
+  const [loading, setLoading] = useState(true)
+  const [saving, setSaving] = useState(false)
+  const [saved, setSaved] = useState(false)
+  const [testing, setTesting] = useState<string | null>(null)
+  const [testResult, setTestResult] = useState<{ channel: string; success: boolean; message: string } | null>(null)
+  const [showHistory, setShowHistory] = useState(false)
+  const [showAdvanced, setShowAdvanced] = useState(false)
+  const [showSecrets, setShowSecrets] = useState<Record<string, boolean>>({})
+  const [editMode, setEditMode] = useState(false)
+  const [hasChanges, setHasChanges] = useState(false)
+  const [expandedCategories, setExpandedCategories] = useState<Set<string>>(new Set())
+  const [originalConfig, setOriginalConfig] = useState<NotificationConfig>(DEFAULT_CONFIG)
+  const [webhookSetup, setWebhookSetup] = useState<{
+    status: "idle" | "running" | "success" | "failed"
+    fallback_commands: string[]
+    error: string
+  }>({ status: "idle", fallback_commands: [], error: "" })
+
+  const loadConfig = useCallback(async () => {
+    try {
+      const data = await fetchApi<{ success: boolean; config: NotificationConfig }>("/api/notifications/settings")
+      if (data.success && data.config) {
+        setConfig(data.config)
+        setOriginalConfig(data.config)
+      }
+    } catch (err) {
+      console.error("Failed to load notification settings:", err)
+    } finally {
+      setLoading(false)
+    }
+  }, [])
+
+  const loadStatus = useCallback(async () => {
+    try {
+      const data = await fetchApi<{ success: boolean } & ServiceStatus>("/api/notifications/status")
+      if (data.success) {
+        setStatus(data)
+      }
+    } catch {
+      // Service may not be running yet
+    }
+  }, [])
+
+  const loadHistory = useCallback(async () => {
+    try {
+      const data = await fetchApi<{ success: boolean; history: HistoryEntry[]; total: number }>("/api/notifications/history?limit=20")
+      if (data.success) {
+        setHistory(data.history || [])
+      }
+    } catch {
+      // Ignore
+    }
+  }, [])
+
+  useEffect(() => {
+    loadConfig()
+    loadStatus()
+  }, [loadConfig, loadStatus])
+
+  useEffect(() => {
+    if (showHistory) loadHistory()
+  }, [showHistory, loadHistory])
+
+  const updateConfig = (updater: (prev: NotificationConfig) => NotificationConfig) => {
+    setConfig(prev => {
+      const next = updater(prev)
+      setHasChanges(true)
+      return next
+    })
+  }
+
+  const updateChannel = (channel: string, field: string, value: string | boolean) => {
+    updateConfig(prev => ({
+      ...prev,
+      channels: {
+        ...prev.channels,
+        [channel]: { ...prev.channels[channel], [field]: value },
+      },
+    }))
+  }
+
+  /** Flatten the nested NotificationConfig into the flat key-value map the backend expects. */
+  const flattenConfig = (cfg: NotificationConfig): Record<string, string> => {
+    const flat: Record<string, string> = {
+      enabled: String(cfg.enabled),
+      severity_filter: cfg.severity_filter,
+      ai_enabled: String(cfg.ai_enabled),
+      ai_provider: cfg.ai_provider,
+      ai_api_key: cfg.ai_api_key,
+      ai_model: cfg.ai_model,
+      hostname: cfg.hostname,
+      webhook_secret: cfg.webhook_secret,
+      webhook_allowed_ips: cfg.webhook_allowed_ips,
+      pbs_host: cfg.pbs_host,
+      pve_host: cfg.pve_host,
+      pbs_trusted_sources: cfg.pbs_trusted_sources,
+    }
+    // Flatten channels: { telegram: { enabled, bot_token, chat_id } } -> telegram.enabled, telegram.bot_token, ...
+    for (const [chName, chCfg] of Object.entries(cfg.channels)) {
+      for (const [field, value] of Object.entries(chCfg)) {
+        flat[`${chName}.${field}`] = String(value ?? "")
+      }
+    }
+    // Flatten event_categories: { system: true, backups: false } -> events.system, events.backups
+    for (const [cat, enabled] of Object.entries(cfg.event_categories)) {
+      flat[`events.${cat}`] = String(enabled)
+    }
+    // Flatten event_toggles: { vm_start: true, vm_stop: false } -> event.vm_start, event.vm_stop
+    // Always write ALL toggles to DB so the backend has an explicit record.
+    // This ensures default_enabled changes in templates don't get overridden by stale DB values.
+    if (cfg.event_toggles) {
+      for (const [evt, enabled] of Object.entries(cfg.event_toggles)) {
+        flat[`event.${evt}`] = String(enabled)
+      }
+    }
+    // Also write any events NOT in event_toggles using their template defaults.
+    // This covers newly added templates whose default_enabled may be false.
+    if (cfg.event_types_by_group) {
+      for (const events of Object.values(cfg.event_types_by_group)) {
+        for (const evt of (events as Array<{type: string, default_enabled: boolean}>)) {
+          const key = `event.${evt.type}`
+          if (!(key in flat)) {
+            flat[key] = String(evt.default_enabled)
+          }
+        }
+      }
+    }
+    return flat
+  }
+
+  const handleSave = async () => {
+    setSaving(true)
+    try {
+      // If notifications are being disabled, clean up PVE webhook first
+      const wasEnabled = originalConfig.enabled
+      const isNowDisabled = !config.enabled
+      
+      if (wasEnabled && isNowDisabled) {
+        try {
+          await fetchApi("/api/notifications/proxmox/cleanup-webhook", { method: "POST" })
+        } catch {
+          // Non-fatal: webhook cleanup failed but we still save settings
+        }
+      }
+      
+      const payload = flattenConfig(config)
+      await fetchApi("/api/notifications/settings", {
+        method: "POST",
+        body: JSON.stringify(payload),
+      })
+      setOriginalConfig(config)
+      setHasChanges(false)
+      setEditMode(false)
+      setSaved(true)
+      setTimeout(() => setSaved(false), 3000)
+      loadStatus()
+    } catch (err) {
+      console.error("Failed to save notification settings:", err)
+    } finally {
+      setSaving(false)
+    }
+  }
+
+  const handleCancel = () => {
+    setConfig(originalConfig)
+    setHasChanges(false)
+    setEditMode(false)
+  }
+
+  const handleTest = async (channel: string) => {
+    setTesting(channel)
+    setTestResult(null)
+    try {
+      // Auto-save current config before testing so backend has latest channel data
+      const payload = flattenConfig(config)
+      await fetchApi("/api/notifications/settings", {
+        method: "POST",
+        body: JSON.stringify(payload),
+      })
+      setOriginalConfig(config)
+      setHasChanges(false)
+      
+      const data = await fetchApi<{
+        success: boolean
+        message?: string
+        error?: string
+        results?: Record<string, { success: boolean; error?: string | null }>
+      }>("/api/notifications/test", {
+        method: "POST",
+        body: JSON.stringify({ channel }),
+      })
+      
+      // Extract message from the results object if present
+      let message = data.message || ""
+      if (!message && data.results) {
+        const channelResult = data.results[channel]
+        if (channelResult) {
+          message = channelResult.success
+            ? "Test notification sent successfully"
+            : channelResult.error || "Test failed"
+        }
+      }
+      if (!message && data.error) {
+        message = data.error
+      }
+      if (!message) {
+        message = data.success ? "Test notification sent successfully" : "Test failed"
+      }
+      
+      setTestResult({ channel, success: data.success, message })
+    } catch (err) {
+      setTestResult({ channel, success: false, message: String(err) })
+    } finally {
+      setTesting(null)
+      setTimeout(() => setTestResult(null), 8000)
+    }
+  }
+
+  const handleClearHistory = async () => {
+    try {
+      await fetchApi("/api/notifications/history", { method: "DELETE" })
+      setHistory([])
+    } catch {
+      // Ignore
+    }
+  }
+
+  const toggleSecret = (key: string) => {
+    setShowSecrets(prev => ({ ...prev, [key]: !prev[key] }))
+  }
+
+  if (loading) {
+    return (
+      <Card>
+        <CardHeader>
+          <div className="flex items-center gap-2">
+            <Bell className="h-5 w-5 text-blue-500" />
+            <CardTitle>Notifications</CardTitle>
+          </div>
+        </CardHeader>
+        <CardContent>
+          <div className="flex items-center justify-center py-8">
+            <div className="animate-spin h-8 w-8 border-4 border-blue-500 border-t-transparent rounded-full" />
+          </div>
+        </CardContent>
+      </Card>
+    )
+  }
+
+  const activeChannels = Object.entries(config.channels).filter(([, ch]) => ch.enabled).length
+
+  const handleEnable = async () => {
+    setSaving(true)
+    setWebhookSetup({ status: "running", fallback_commands: [], error: "" })
+    try {
+      // 1) Save enabled=true
+      const newConfig = { ...config, enabled: true }
+      await fetchApi("/api/notifications/settings", {
+        method: "POST",
+        body: JSON.stringify(newConfig),
+      })
+      setConfig(newConfig)
+      setOriginalConfig(newConfig)
+
+      // 2) Auto-configure PVE webhook
+      try {
+        const setup = await fetchApi<{
+          configured: boolean
+          secret?: string
+          fallback_commands?: string[]
+          error?: string
+        }>("/api/notifications/proxmox/setup-webhook", { method: "POST" })
+
+        if (setup.configured) {
+          setWebhookSetup({ status: "success", fallback_commands: [], error: "" })
+          // Update secret in local config if one was generated
+          if (setup.secret) {
+            const updated = { ...newConfig, webhook_secret: setup.secret }
+            setConfig(updated)
+            setOriginalConfig(updated)
+          }
+        } else {
+          setWebhookSetup({
+            status: "failed",
+            fallback_commands: setup.fallback_commands || [],
+            error: setup.error || "Unknown error",
+          })
+        }
+      } catch {
+        setWebhookSetup({
+          status: "failed",
+          fallback_commands: [],
+          error: "Could not reach setup endpoint",
+        })
+      }
+
+      setEditMode(true)
+      loadStatus()
+    } catch (err) {
+      console.error("Failed to enable notifications:", err)
+      setWebhookSetup({ status: "idle", fallback_commands: [], error: "" })
+    } finally {
+      setSaving(false)
+    }
+  }
+
+  // ── Disabled state: show activation card ──
+  if (!config.enabled && !editMode) {
+    return (
+      <Card>
+        <CardHeader>
+          <div className="flex items-center gap-2">
+            <BellOff className="h-5 w-5 text-muted-foreground" />
+            <CardTitle>Notifications</CardTitle>
+            <Badge variant="outline" className="text-[10px] border-muted-foreground/30 text-muted-foreground">
+              Disabled
+            </Badge>
+          </div>
+          <CardDescription>
+            Get real-time alerts about your Proxmox environment via Telegram, Discord, Gotify, or Email.
+          </CardDescription>
+        </CardHeader>
+        <CardContent>
+          <div className="space-y-4">
+            <div className="flex flex-col gap-3 p-4 bg-muted/50 rounded-lg border border-border">
+              <div className="flex items-start gap-3">
+                <Bell className="h-5 w-5 text-blue-500 mt-0.5 shrink-0" />
+                <div className="space-y-1">
+                  <p className="text-sm font-medium">Enable notification service</p>
+                  <p className="text-xs text-muted-foreground leading-relaxed">
+                    Monitor system health, VM/CT events, backups, security alerts, and cluster status.
+                    PVE webhook integration is configured automatically.
+                  </p>
+                </div>
+              </div>
+              <div className="flex flex-col sm:flex-row items-start gap-2">
+                <button
+                  className="h-8 px-4 text-sm rounded-md bg-blue-600 hover:bg-blue-700 text-white transition-colors w-full sm:w-auto disabled:opacity-50 flex items-center justify-center gap-2"
+                  onClick={handleEnable}
+                  disabled={saving}
+                >
+                  {saving ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Bell className="h-3.5 w-3.5" />}
+                  {saving ? "Configuring..." : "Enable Notifications"}
+                </button>
+              </div>
+
+              {/* Webhook setup result */}
+              {webhookSetup.status === "success" && (
+                <div className="flex items-start gap-2 p-2 rounded-md bg-green-500/10 border border-green-500/20">
+                  <CheckCircle2 className="h-3.5 w-3.5 text-green-500 shrink-0 mt-0.5" />
+                  <p className="text-[11px] text-green-400 leading-relaxed">
+                    PVE webhook configured automatically. Proxmox will send notifications to ProxMenux.
+                  </p>
+                </div>
+              )}
+              {webhookSetup.status === "failed" && (
+                <div className="space-y-2">
+                  <div className="flex items-start gap-2 p-2 rounded-md bg-amber-500/10 border border-amber-500/20">
+                    <AlertTriangle className="h-3.5 w-3.5 text-amber-400 shrink-0 mt-0.5" />
+                    <div className="space-y-1">
+                      <p className="text-[11px] text-amber-400 leading-relaxed">
+                        Automatic PVE configuration failed: {webhookSetup.error}
+                      </p>
+                      <p className="text-[10px] text-muted-foreground">
+                        Notifications are enabled. Run the commands below on the PVE host to complete webhook setup.
+                      </p>
+                    </div>
+                  </div>
+                  {webhookSetup.fallback_commands.length > 0 && (
+                    <pre className="text-[11px] bg-background p-2 rounded border border-border overflow-x-auto font-mono">
+{webhookSetup.fallback_commands.join('\n')}
+                    </pre>
+                  )}
+                </div>
+              )}
+            </div>
+
+            {/* PBS manual section (collapsible) */}
+            <details className="group">
+              <summary className="text-xs font-medium text-muted-foreground cursor-pointer hover:text-foreground transition-colors flex items-center gap-1.5">
+                <ChevronDown className="h-3 w-3 group-open:rotate-180 transition-transform" />
+                <Webhook className="h-3 w-3" />
+                Configure PBS notifications (manual)
+              </summary>
+              <div className="mt-2 p-3 bg-muted/30 rounded-md border border-border space-y-3">
+                <div className="space-y-1">
+                  <p className="text-xs text-muted-foreground leading-relaxed">
+                    PVE backups launched from the PVE interface are covered automatically by the PVE webhook above.
+                  </p>
+                  <p className="text-xs text-muted-foreground leading-relaxed">
+                    However, PBS has its own internal jobs (Verify, Prune, GC, Sync) that generate
+                    separate notifications. These must be configured directly on the PBS server.
+                  </p>
+                </div>
+                <div className="space-y-1.5">
+                  <p className="text-[11px] font-medium text-muted-foreground">
+                    Append to /etc/proxmox-backup/notifications.cfg on the PBS host:
+                  </p>
+                  <pre className="text-[11px] bg-background p-2 rounded border border-border overflow-x-auto font-mono">
+{`webhook: proxmenux-webhook
+\tmethod post
+\turl http://<PVE_IP>:8008/api/notifications/webhook
+
+matcher: proxmenux-pbs
+\ttarget proxmenux-webhook
+\tmatch-severity warning,error`}
+                  </pre>
+                </div>
+                <div className="flex items-start gap-2 p-2 rounded-md bg-blue-500/10 border border-blue-500/20">
+                  <Info className="h-3.5 w-3.5 text-blue-400 shrink-0 mt-0.5" />
+                  <p className="text-[10px] text-blue-400/90 leading-relaxed">
+                    {"Replace <PVE_IP> with the IP of this PVE node (not 127.0.0.1, unless PBS runs on the same host). Append at the end -- do not delete existing content."}
+                  </p>
+                </div>
+              </div>
+            </details>
+          </div>
+        </CardContent>
+      </Card>
+    )
+  }
+
+  return (
+    <Card>
+      <CardHeader>
+        <div className="flex items-center justify-between">
+          <div className="flex items-center gap-2">
+            <Bell className="h-5 w-5 text-blue-500" />
+            <CardTitle>Notifications</CardTitle>
+            {config.enabled && (
+              <Badge variant="outline" className="text-[10px] border-green-500/30 text-green-500">
+                Active
+              </Badge>
+            )}
+          </div>
+          <div className="flex items-center gap-2">
+            {saved && (
+              <span className="flex items-center gap-1 text-xs text-green-500">
+                <CheckCircle2 className="h-3.5 w-3.5" />
+                Saved
+              </span>
+            )}
+            {editMode ? (
+              <>
+                <button
+                  className="h-7 px-3 text-xs rounded-md border border-border bg-background hover:bg-muted transition-colors text-muted-foreground"
+                  onClick={handleCancel}
+                  disabled={saving}
+                >
+                  Cancel
+                </button>
+                <button
+                  className="h-7 px-3 text-xs rounded-md bg-blue-600 hover:bg-blue-700 text-white transition-colors disabled:opacity-50 flex items-center gap-1.5"
+                  onClick={handleSave}
+                  disabled={saving || !hasChanges}
+                >
+                  {saving ? <Loader2 className="h-3 w-3 animate-spin" /> : <CheckCircle2 className="h-3 w-3" />}
+                  Save
+                </button>
+              </>
+            ) : (
+              <button
+                className="h-7 px-3 text-xs rounded-md border border-border bg-background hover:bg-muted transition-colors flex items-center gap-1.5"
+                onClick={() => setEditMode(true)}
+              >
+                <Settings2 className="h-3 w-3" />
+                Edit
+              </button>
+            )}
+          </div>
+        </div>
+        <CardDescription>
+          Configure notification channels and event filters. Receive alerts via Telegram, Gotify, Discord, or Email.
+        </CardDescription>
+      </CardHeader>
+
+      <CardContent className="space-y-5">
+        {/* ── Service Status ── */}
+        {status && (
+          <div className="flex items-center gap-3 p-3 rounded-lg bg-muted/50 border border-border">
+            <div className={`h-2.5 w-2.5 rounded-full ${status.running ? "bg-green-500" : "bg-red-500"}`} />
+            <div className="flex-1 min-w-0">
+              <span className="text-xs font-medium">
+                {status.running ? "Service running" : "Service stopped"}
+              </span>
+              {status.total_sent_24h > 0 && (
+                <span className="text-xs text-muted-foreground ml-2">
+                  {status.total_sent_24h} sent in last 24h
+                </span>
+              )}
+            </div>
+            {activeChannels > 0 && (
+              <Badge variant="outline" className="text-[10px]">
+                {activeChannels} channel{activeChannels > 1 ? "s" : ""}
+              </Badge>
+            )}
+          </div>
+        )}
+
+        {/* ── Enable/Disable ── */}
+        <div className="flex items-center justify-between py-2 px-1">
+          <div className="flex items-center gap-2">
+            {config.enabled ? (
+              <Bell className="h-4 w-4 text-blue-500" />
+            ) : (
+              <BellOff className="h-4 w-4 text-muted-foreground" />
+            )}
+            <div>
+              <span className="text-sm font-medium">Enable Notifications</span>
+              <p className="text-[11px] text-muted-foreground">Activate the notification service</p>
+            </div>
+          </div>
+          <button
+            className={`relative w-10 h-5 rounded-full transition-colors ${
+              config.enabled ? "bg-blue-600" : "bg-muted-foreground/30"
+            } ${!editMode ? "opacity-60 cursor-not-allowed" : "cursor-pointer"}`}
+            onClick={() => editMode && updateConfig(p => ({ ...p, enabled: !p.enabled }))}
+            disabled={!editMode}
+            role="switch"
+            aria-checked={config.enabled}
+          >
+            <span
+              className={`absolute top-0.5 left-0.5 h-4 w-4 rounded-full bg-white shadow transition-transform ${
+                config.enabled ? "translate-x-5" : "translate-x-0"
+              }`}
+            />
+          </button>
+        </div>
+
+        {config.enabled && (
+          <>
+            {/* ── Channel Configuration ── */}
+            <div className="space-y-3">
+              <div className="flex items-center gap-2">
+                <Zap className="h-3.5 w-3.5 text-muted-foreground" />
+                <span className="text-xs font-medium text-muted-foreground uppercase tracking-wider">Channels</span>
+              </div>
+
+              <div className="rounded-lg border border-border/50 bg-muted/20 p-3">
+              <Tabs defaultValue="telegram" className="w-full">
+                <TabsList className="w-full grid grid-cols-4 h-8">
+                  <TabsTrigger value="telegram" className="text-xs data-[state=active]:text-blue-500">
+                    Telegram
+                  </TabsTrigger>
+                  <TabsTrigger value="gotify" className="text-xs data-[state=active]:text-green-500">
+                    Gotify
+                  </TabsTrigger>
+                  <TabsTrigger value="discord" className="text-xs data-[state=active]:text-indigo-500">
+                    Discord
+                  </TabsTrigger>
+                  <TabsTrigger value="email" className="text-xs data-[state=active]:text-amber-500">
+                    Email
+                  </TabsTrigger>
+                </TabsList>
+
+                {/* Telegram */}
+                <TabsContent value="telegram" className="space-y-3 pt-2">
+                  <div className="flex items-center justify-between">
+                    <Label className="text-xs font-medium">Enable Telegram</Label>
+                    <button
+                      className={`relative w-9 h-[18px] rounded-full transition-colors ${
+                        config.channels.telegram?.enabled ? "bg-blue-600" : "bg-muted-foreground/30"
+                      } cursor-pointer`}
+                      onClick={() => updateChannel("telegram", "enabled", !config.channels.telegram?.enabled)}
+                      role="switch"
+                      aria-checked={config.channels.telegram?.enabled || false}
+                    >
+                      <span className={`absolute top-[1px] left-[1px] h-4 w-4 rounded-full bg-white shadow transition-transform ${
+                        config.channels.telegram?.enabled ? "translate-x-[18px]" : "translate-x-0"
+                      }`} />
+                    </button>
+                  </div>
+                  {config.channels.telegram?.enabled && (
+                    <>
+                      <div className="space-y-1.5">
+                        <Label className="text-[11px] text-muted-foreground">Bot Token</Label>
+                        <div className="flex items-center gap-1.5">
+                          <Input
+                            type={showSecrets["tg_token"] ? "text" : "password"}
+                            className="h-7 text-xs font-mono"
+                            placeholder="7595377878:AAGE6Fb2cy... (with or without 'bot' prefix)"
+                            value={config.channels.telegram?.bot_token || ""}
+                            onChange={e => updateChannel("telegram", "bot_token", e.target.value)}
+                          />
+                          <button
+                            className="h-7 w-7 flex items-center justify-center rounded-md border border-border hover:bg-muted transition-colors shrink-0"
+                            onClick={() => toggleSecret("tg_token")}
+                          >
+                            {showSecrets["tg_token"] ? <EyeOff className="h-3 w-3" /> : <Eye className="h-3 w-3" />}
+                          </button>
+                        </div>
+                      </div>
+                      <div className="space-y-1.5">
+                        <Label className="text-[11px] text-muted-foreground">Chat ID</Label>
+                        <Input
+                          className="h-7 text-xs font-mono"
+                          placeholder="-1001234567890"
+                          value={config.channels.telegram?.chat_id || ""}
+                          onChange={e => updateChannel("telegram", "chat_id", e.target.value)}
+                        />
+                      </div>
+                      {/* Per-channel action bar */}
+                      <div className="flex items-center gap-2 pt-2 border-t border-border/50">
+                        <button
+                          className="h-7 px-3 text-xs rounded-md bg-blue-600 hover:bg-blue-700 text-white transition-colors disabled:opacity-50 flex items-center gap-1.5"
+                          onClick={handleSave}
+                          disabled={saving || !hasChanges}
+                        >
+                          {saving ? <Loader2 className="h-3 w-3 animate-spin" /> : <CheckCircle2 className="h-3 w-3" />}
+                          Save
+                        </button>
+                        <button
+                          className="h-7 px-3 text-xs rounded-md border border-border bg-background hover:bg-muted transition-colors flex items-center gap-1.5 disabled:opacity-50"
+                          onClick={() => handleTest("telegram")}
+                          disabled={testing === "telegram" || !config.channels.telegram?.bot_token}
+                        >
+                          {testing === "telegram" ? <Loader2 className="h-3 w-3 animate-spin" /> : <TestTube2 className="h-3 w-3" />}
+                          Send Test
+                        </button>
+                      </div>
+                    </>
+                  )}
+                </TabsContent>
+
+                {/* Gotify */}
+                <TabsContent value="gotify" className="space-y-3 pt-2">
+                  <div className="flex items-center justify-between">
+                    <Label className="text-xs font-medium">Enable Gotify</Label>
+                    <button
+                      className={`relative w-9 h-[18px] rounded-full transition-colors ${
+                        config.channels.gotify?.enabled ? "bg-green-600" : "bg-muted-foreground/30"
+                      } cursor-pointer`}
+                      onClick={() => updateChannel("gotify", "enabled", !config.channels.gotify?.enabled)}
+                      role="switch"
+                      aria-checked={config.channels.gotify?.enabled || false}
+                    >
+                      <span className={`absolute top-[1px] left-[1px] h-4 w-4 rounded-full bg-white shadow transition-transform ${
+                        config.channels.gotify?.enabled ? "translate-x-[18px]" : "translate-x-0"
+                      }`} />
+                    </button>
+                  </div>
+                  {config.channels.gotify?.enabled && (
+                    <>
+                      <div className="space-y-1.5">
+                        <Label className="text-[11px] text-muted-foreground">Server URL</Label>
+                        <Input
+                          className="h-7 text-xs font-mono"
+                          placeholder="https://gotify.example.com"
+                          value={config.channels.gotify?.url || ""}
+                          onChange={e => updateChannel("gotify", "url", e.target.value)}
+                        />
+                      </div>
+                      <div className="space-y-1.5">
+                        <Label className="text-[11px] text-muted-foreground">App Token</Label>
+                        <div className="flex items-center gap-1.5">
+                          <Input
+                            type={showSecrets["gt_token"] ? "text" : "password"}
+                            className="h-7 text-xs font-mono"
+                            placeholder="A_valid_gotify_token"
+                            value={config.channels.gotify?.token || ""}
+                            onChange={e => updateChannel("gotify", "token", e.target.value)}
+                          />
+                          <button
+                            className="h-7 w-7 flex items-center justify-center rounded-md border border-border hover:bg-muted transition-colors shrink-0"
+                            onClick={() => toggleSecret("gt_token")}
+                          >
+                            {showSecrets["gt_token"] ? <EyeOff className="h-3 w-3" /> : <Eye className="h-3 w-3" />}
+                          </button>
+                        </div>
+                      </div>
+                      {/* Per-channel action bar */}
+                      <div className="flex items-center gap-2 pt-2 border-t border-border/50">
+                        <button
+                          className="h-7 px-3 text-xs rounded-md bg-green-600 hover:bg-green-700 text-white transition-colors disabled:opacity-50 flex items-center gap-1.5"
+                          onClick={handleSave}
+                          disabled={saving || !hasChanges}
+                        >
+                          {saving ? <Loader2 className="h-3 w-3 animate-spin" /> : <CheckCircle2 className="h-3 w-3" />}
+                          Save
+                        </button>
+                        <button
+                          className="h-7 px-3 text-xs rounded-md border border-border bg-background hover:bg-muted transition-colors flex items-center gap-1.5 disabled:opacity-50"
+                          onClick={() => handleTest("gotify")}
+                          disabled={testing === "gotify" || !config.channels.gotify?.url}
+                        >
+                          {testing === "gotify" ? <Loader2 className="h-3 w-3 animate-spin" /> : <TestTube2 className="h-3 w-3" />}
+                          Send Test
+                        </button>
+                      </div>
+                    </>
+                  )}
+                </TabsContent>
+
+                {/* Discord */}
+                <TabsContent value="discord" className="space-y-3 pt-2">
+                  <div className="flex items-center justify-between">
+                    <Label className="text-xs font-medium">Enable Discord</Label>
+                    <button
+                      className={`relative w-9 h-[18px] rounded-full transition-colors ${
+                        config.channels.discord?.enabled ? "bg-indigo-600" : "bg-muted-foreground/30"
+                      } cursor-pointer`}
+                      onClick={() => updateChannel("discord", "enabled", !config.channels.discord?.enabled)}
+                      role="switch"
+                      aria-checked={config.channels.discord?.enabled || false}
+                    >
+                      <span className={`absolute top-[1px] left-[1px] h-4 w-4 rounded-full bg-white shadow transition-transform ${
+                        config.channels.discord?.enabled ? "translate-x-[18px]" : "translate-x-0"
+                      }`} />
+                    </button>
+                  </div>
+                  {config.channels.discord?.enabled && (
+                    <>
+                      <div className="space-y-1.5">
+                        <Label className="text-[11px] text-muted-foreground">Webhook URL</Label>
+                        <div className="flex items-center gap-1.5">
+                          <Input
+                            type={showSecrets["dc_hook"] ? "text" : "password"}
+                            className="h-7 text-xs font-mono"
+                            placeholder="https://discord.com/api/webhooks/..."
+                            value={config.channels.discord?.webhook_url || ""}
+                            onChange={e => updateChannel("discord", "webhook_url", e.target.value)}
+                          />
+                          <button
+                            className="h-7 w-7 flex items-center justify-center rounded-md border border-border hover:bg-muted transition-colors shrink-0"
+                            onClick={() => toggleSecret("dc_hook")}
+                          >
+                            {showSecrets["dc_hook"] ? <EyeOff className="h-3 w-3" /> : <Eye className="h-3 w-3" />}
+                          </button>
+                        </div>
+                      </div>
+                      {/* Per-channel action bar */}
+                      <div className="flex items-center gap-2 pt-2 border-t border-border/50">
+                        <button
+                          className="h-7 px-3 text-xs rounded-md bg-indigo-600 hover:bg-indigo-700 text-white transition-colors disabled:opacity-50 flex items-center gap-1.5"
+                          onClick={handleSave}
+                          disabled={saving || !hasChanges}
+                        >
+                          {saving ? <Loader2 className="h-3 w-3 animate-spin" /> : <CheckCircle2 className="h-3 w-3" />}
+                          Save
+                        </button>
+                        <button
+                          className="h-7 px-3 text-xs rounded-md border border-border bg-background hover:bg-muted transition-colors flex items-center gap-1.5 disabled:opacity-50"
+                          onClick={() => handleTest("discord")}
+                          disabled={testing === "discord" || !config.channels.discord?.webhook_url}
+                        >
+                          {testing === "discord" ? <Loader2 className="h-3 w-3 animate-spin" /> : <TestTube2 className="h-3 w-3" />}
+                          Send Test
+                        </button>
+                      </div>
+                    </>
+                  )}
+                </TabsContent>
+
+                {/* Email */}
+                <TabsContent value="email" className="space-y-3 pt-2">
+                  <div className="flex items-center justify-between">
+                    <Label className="text-xs font-medium">Enable Email</Label>
+                    <button
+                      className={`relative w-9 h-[18px] rounded-full transition-colors ${
+                        config.channels.email?.enabled ? "bg-amber-600" : "bg-muted-foreground/30"
+                      } cursor-pointer`}
+                      onClick={() => updateChannel("email", "enabled", !config.channels.email?.enabled)}
+                      role="switch"
+                      aria-checked={config.channels.email?.enabled || false}
+                    >
+                      <span className={`absolute top-[1px] left-[1px] h-4 w-4 rounded-full bg-white shadow transition-transform ${
+                        config.channels.email?.enabled ? "translate-x-[18px]" : "translate-x-0"
+                      }`} />
+                    </button>
+                  </div>
+                  {config.channels.email?.enabled && (
+                    <>
+                      <div className="grid grid-cols-1 sm:grid-cols-2 gap-2">
+                        <div className="space-y-1.5">
+                          <Label className="text-[11px] text-muted-foreground">SMTP Host</Label>
+                          <Input
+                            className="h-7 text-xs font-mono"
+                            placeholder="smtp.gmail.com"
+                            value={config.channels.email?.host || ""}
+                            onChange={e => updateChannel("email", "host", e.target.value)}
+                          />
+                        </div>
+                        <div className="space-y-1.5">
+                          <Label className="text-[11px] text-muted-foreground">Port</Label>
+                          <Input
+                            className="h-7 text-xs font-mono"
+                            placeholder="587"
+                            value={config.channels.email?.port || ""}
+                            onChange={e => updateChannel("email", "port", e.target.value)}
+                          />
+                        </div>
+                      </div>
+                      <div className="space-y-1.5">
+                        <Label className="text-[11px] text-muted-foreground">TLS Mode</Label>
+                        <Select
+                          value={config.channels.email?.tls_mode || "starttls"}
+                          onValueChange={v => updateChannel("email", "tls_mode", v)}
+                        >
+                          <SelectTrigger className="h-7 text-xs">
+                            <SelectValue />
+                          </SelectTrigger>
+                          <SelectContent>
+                            <SelectItem value="starttls">STARTTLS (port 587)</SelectItem>
+                            <SelectItem value="ssl">SSL/TLS (port 465)</SelectItem>
+                            <SelectItem value="none">None (port 25)</SelectItem>
+                          </SelectContent>
+                        </Select>
+                      </div>
+                      <div className="grid grid-cols-1 sm:grid-cols-2 gap-2">
+                        <div className="space-y-1.5">
+                          <Label className="text-[11px] text-muted-foreground">Username</Label>
+                          <Input
+                            className="h-7 text-xs font-mono"
+                            placeholder="user@example.com"
+                            value={config.channels.email?.username || ""}
+                            onChange={e => updateChannel("email", "username", e.target.value)}
+                          />
+                        </div>
+                        <div className="space-y-1.5">
+                          <Label className="text-[11px] text-muted-foreground">Password</Label>
+                          <div className="flex items-center gap-1.5">
+                            <Input
+                              type={showSecrets["em_pass"] ? "text" : "password"}
+                              className="h-7 text-xs font-mono"
+                              placeholder="App password"
+                              value={config.channels.email?.password || ""}
+                              onChange={e => updateChannel("email", "password", e.target.value)}
+                            />
+                            <button
+                              className="h-7 w-7 flex items-center justify-center rounded-md border border-border hover:bg-muted transition-colors shrink-0"
+                              onClick={() => toggleSecret("em_pass")}
+                            >
+                              {showSecrets["em_pass"] ? <EyeOff className="h-3 w-3" /> : <Eye className="h-3 w-3" />}
+                            </button>
+                          </div>
+                        </div>
+                      </div>
+                      <div className="space-y-1.5">
+                        <Label className="text-[11px] text-muted-foreground">From Address</Label>
+                        <Input
+                          className="h-7 text-xs font-mono"
+                          placeholder="proxmenux@yourdomain.com"
+                          value={config.channels.email?.from_address || ""}
+                          onChange={e => updateChannel("email", "from_address", e.target.value)}
+                        />
+                      </div>
+                      <div className="space-y-1.5">
+                        <Label className="text-[11px] text-muted-foreground">To Addresses (comma-separated)</Label>
+                        <Input
+                          className="h-7 text-xs font-mono"
+                          placeholder="admin@example.com, ops@example.com"
+                          value={config.channels.email?.to_addresses || ""}
+                          onChange={e => updateChannel("email", "to_addresses", e.target.value)}
+                        />
+                      </div>
+                      <div className="space-y-1.5">
+                        <Label className="text-[11px] text-muted-foreground">Subject Prefix</Label>
+                        <Input
+                          className="h-7 text-xs font-mono"
+                          placeholder="[ProxMenux]"
+                          value={config.channels.email?.subject_prefix || "[ProxMenux]"}
+                          onChange={e => updateChannel("email", "subject_prefix", e.target.value)}
+                        />
+                      </div>
+                      <div className="flex items-start gap-2 p-2 rounded-md bg-amber-500/10 border border-amber-500/20">
+                        <Info className="h-3.5 w-3.5 text-amber-400 shrink-0 mt-0.5" />
+                        <p className="text-[10px] text-amber-400/90 leading-relaxed">
+                          Leave SMTP Host empty to use local sendmail (must be installed on the server).
+                          For Gmail, use an App Password instead of your account password.
+                        </p>
+                      </div>
+                      {/* Per-channel action bar */}
+                      <div className="flex items-center gap-2 pt-2 border-t border-border/50">
+                        <button
+                          className="h-7 px-3 text-xs rounded-md bg-amber-600 hover:bg-amber-700 text-white transition-colors disabled:opacity-50 flex items-center gap-1.5"
+                          onClick={handleSave}
+                          disabled={saving || !hasChanges}
+                        >
+                          {saving ? <Loader2 className="h-3 w-3 animate-spin" /> : <CheckCircle2 className="h-3 w-3" />}
+                          Save
+                        </button>
+                        <button
+                          className="h-7 px-3 text-xs rounded-md border border-border bg-background hover:bg-muted transition-colors flex items-center gap-1.5 disabled:opacity-50"
+                          onClick={() => handleTest("email")}
+                          disabled={testing === "email" || !config.channels.email?.to_addresses}
+                        >
+                          {testing === "email" ? <Loader2 className="h-3 w-3 animate-spin" /> : <TestTube2 className="h-3 w-3" />}
+                          Send Test
+                        </button>
+                      </div>
+                    </>
+                  )}
+                </TabsContent>
+              </Tabs>
+
+              {/* Test Result */}
+              {testResult && (
+                <div className={`flex items-center gap-2 p-2.5 rounded-md text-xs mt-2 ${
+                  testResult.success
+                    ? "bg-green-500/10 border border-green-500/20 text-green-400"
+                    : "bg-red-500/10 border border-red-500/20 text-red-400"
+                }`}>
+                  {testResult.success ? (
+                    <CheckCircle2 className="h-3.5 w-3.5 shrink-0" />
+                  ) : (
+                    <XCircle className="h-3.5 w-3.5 shrink-0" />
+                  )}
+                  <span>{testResult.message}</span>
+                </div>
+              )}
+              </div>{/* close bordered channel container */}
+            </div>
+
+            {/* ── Filters ── */}
+            <div className="space-y-3 border-t border-border pt-4">
+              <div className="flex items-center gap-2">
+                <AlertTriangle className="h-3.5 w-3.5 text-muted-foreground" />
+                <span className="text-xs font-medium text-muted-foreground uppercase tracking-wider">Filters & Events</span>
+              </div>
+              <div className="rounded-lg border border-border/50 bg-muted/20 p-3 space-y-4">
+              {/* Severity */}
+              <div className="space-y-1.5">
+                <Label className="text-[11px] text-muted-foreground">Severity Filter</Label>
+                <Select
+                  value={config.severity_filter}
+                  onValueChange={v => updateConfig(p => ({ ...p, severity_filter: v }))}
+                  disabled={!editMode}
+                >
+                  <SelectTrigger className={`h-8 text-xs ${!editMode ? "opacity-60" : ""}`}>
+                    <SelectValue />
+                  </SelectTrigger>
+                  <SelectContent>
+                    {SEVERITY_OPTIONS.map(opt => (
+                      <SelectItem key={opt.value} value={opt.value}>{opt.label}</SelectItem>
+                    ))}
+                  </SelectContent>
+                </Select>
+              </div>
+
+              {/* Event Categories */}
+              <div className="space-y-1.5 border-t border-border/30 pt-3">
+                <Label className="text-[11px] text-muted-foreground">Event Categories</Label>
+              <div className="space-y-1.5">
+                {EVENT_CATEGORIES.map(cat => {
+                  const isEnabled = config.event_categories[cat.key] ?? true
+                  const isExpanded = expandedCategories.has(cat.key)
+                  const eventsForGroup = config.event_types_by_group?.[cat.key] || []
+                  const enabledCount = eventsForGroup.filter(e => config.event_toggles?.[e.type] ?? e.default_enabled).length
+                  
+                  return (
+                    <div key={cat.key} className={`rounded-md border transition-colors ${
+                      isEnabled ? "border-green-500/30 bg-green-500/5" : "border-border/50 bg-transparent"
+                    }`}>
+                      {/* Category header row */}
+                      <div className="flex items-center gap-2.5 p-2.5">
+                        {/* Expand/collapse button */}
+                        <button
+                          type="button"
+                          className={`shrink-0 transition-transform ${isExpanded ? "rotate-90" : ""} ${
+                            !isEnabled ? "opacity-30 pointer-events-none" : "text-muted-foreground hover:text-foreground"
+                          }`}
+                          onClick={() => {
+                            if (!isEnabled) return
+                            setExpandedCategories(prev => {
+                              const next = new Set(prev)
+                              if (next.has(cat.key)) next.delete(cat.key)
+                              else next.add(cat.key)
+                              return next
+                            })
+                          }}
+                          aria-label={isExpanded ? "Collapse" : "Expand"}
+                        >
+                          <ChevronRight className="h-3.5 w-3.5" />
+                        </button>
+                        
+                        {/* Label + description */}
+                        <div className="flex-1 min-w-0">
+                          <span className={`text-xs font-medium block ${
+                            isEnabled ? "text-green-400" : "text-foreground"
+                          }`}>
+                            {cat.label}
+                          </span>
+                          <span className="text-[10px] text-muted-foreground">{cat.desc}</span>
+                        </div>
+                        
+                        {/* Count badge */}
+                        {isEnabled && eventsForGroup.length > 0 && (
+                          <span className="text-[10px] text-muted-foreground tabular-nums">
+                            {enabledCount}/{eventsForGroup.length}
+                          </span>
+                        )}
+                        
+                        {/* Category toggle */}
+                        <button
+                          type="button"
+                          role="switch"
+                          aria-checked={isEnabled}
+                          disabled={!editMode}
+                          className={`relative inline-flex h-5 w-9 shrink-0 items-center rounded-full transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring ${
+                            !editMode ? "opacity-50 cursor-not-allowed" : "cursor-pointer"
+                          } ${isEnabled ? "bg-green-600" : "bg-muted-foreground/30"}`}
+                          onClick={() => {
+                            if (!editMode) return
+                            const newEnabled = !isEnabled
+                            updateConfig(p => {
+                              const newToggles = { ...(p.event_toggles || {}) }
+                              // When enabling a category, turn all its events on by default
+                              if (newEnabled && eventsForGroup.length > 0) {
+                                for (const evt of eventsForGroup) {
+                                  newToggles[evt.type] = true
+                                }
+                              }
+                              return {
+                                ...p,
+                                event_categories: { ...p.event_categories, [cat.key]: newEnabled },
+                                event_toggles: newToggles,
+                              }
+                            })
+                          }}
+                        >
+                          <span className={`pointer-events-none block h-4 w-4 rounded-full bg-background shadow-sm transition-transform ${
+                            isEnabled ? "translate-x-4" : "translate-x-0.5"
+                          }`} />
+                        </button>
+                      </div>
+                      
+                      {/* Per-event toggles (expanded) */}
+                      {isEnabled && isExpanded && eventsForGroup.length > 0 && (
+                        <div className="border-t border-border/30 px-2.5 py-2 space-y-0.5">
+                          {eventsForGroup.map(evt => {
+                            const evtEnabled = config.event_toggles?.[evt.type] ?? evt.default_enabled
+                            return (
+                              <div key={evt.type} className="flex items-center justify-between py-1 px-2 rounded hover:bg-muted/30 transition-colors">
+                                <span className={`text-[11px] ${evtEnabled ? "text-green-400" : "text-muted-foreground"}`}>
+                                  {evt.title}
+                                </span>
+                                <button
+                                  type="button"
+                                  role="switch"
+                                  aria-checked={evtEnabled}
+                                  disabled={!editMode}
+                                  className={`relative inline-flex h-4 w-7 shrink-0 items-center rounded-full transition-colors focus-visible:outline-none ${
+                                    !editMode ? "opacity-50 cursor-not-allowed" : "cursor-pointer"
+                                  } ${evtEnabled ? "bg-green-600" : "bg-muted-foreground/30"}`}
+                                  onClick={() => {
+                                    if (!editMode) return
+                                    updateConfig(p => ({
+                                      ...p,
+                                      event_toggles: { ...(p.event_toggles || {}), [evt.type]: !evtEnabled },
+                                    }))
+                                  }}
+                                >
+                                  <span className={`pointer-events-none block h-3 w-3 rounded-full bg-background shadow-sm transition-transform ${
+                                    evtEnabled ? "translate-x-3.5" : "translate-x-0.5"
+                                  }`} />
+                                </button>
+                              </div>
+                            )
+                          })}
+                        </div>
+                      )}
+                    </div>
+                  )
+                })}
+              </div>
+              </div>
+              </div>{/* close bordered filters container */}
+            </div>
+
+            {/* ── Proxmox Webhook ── */}
+            <div className="space-y-3 border-t border-border pt-4">
+              <div className="flex items-center gap-2 mb-2">
+                <Webhook className="h-3.5 w-3.5 text-muted-foreground" />
+                <span className="text-xs font-medium text-muted-foreground uppercase tracking-wider">Proxmox Webhook</span>
+              </div>
+              <div className="rounded-lg border border-border/50 bg-muted/20 p-3 space-y-3">
+              <div className="flex items-center justify-between">
+                <div className="flex items-center gap-2">
+                  <span className="text-[11px] font-medium">PVE Webhook Configuration</span>
+                </div>
+                {!editMode && (
+                  <button
+                    className="h-6 px-2.5 text-[10px] rounded-md border border-border bg-background hover:bg-muted transition-colors flex items-center gap-1.5"
+                    onClick={async () => {
+                      try {
+                        setWebhookSetup({ status: "running", fallback_commands: [], error: "" })
+                        const setup = await fetchApi<{
+                          configured: boolean; secret?: string; fallback_commands?: string[]; error?: string
+                        }>("/api/notifications/proxmox/setup-webhook", { method: "POST" })
+                        if (setup.configured) {
+                          setWebhookSetup({ status: "success", fallback_commands: [], error: "" })
+                          if (setup.secret) {
+                            const updated = { ...config, webhook_secret: setup.secret }
+                            setConfig(updated)
+                            setOriginalConfig(updated)
+                          }
+                        } else {
+                          setWebhookSetup({ status: "failed", fallback_commands: setup.fallback_commands || [], error: setup.error || "" })
+                        }
+                      } catch {
+                        setWebhookSetup({ status: "failed", fallback_commands: [], error: "Request failed" })
+                      }
+                    }}
+                    disabled={webhookSetup.status === "running"}
+                  >
+                    {webhookSetup.status === "running" ? <Loader2 className="h-2.5 w-2.5 animate-spin" /> : <Webhook className="h-2.5 w-2.5" />}
+                    Re-configure PVE
+                  </button>
+                )}
+              </div>
+
+              {/* Setup status inline */}
+              {webhookSetup.status === "success" && (
+                <div className="flex items-center gap-2 p-1.5 rounded bg-green-500/10 border border-green-500/20">
+                  <CheckCircle2 className="h-3 w-3 text-green-500 shrink-0" />
+                  <p className="text-[10px] text-green-400">PVE webhook configured successfully.</p>
+                </div>
+              )}
+              {webhookSetup.status === "failed" && (
+                <div className="space-y-1.5">
+                  <div className="flex items-start gap-2 p-1.5 rounded bg-amber-500/10 border border-amber-500/20">
+                    <AlertTriangle className="h-3 w-3 text-amber-400 shrink-0 mt-0.5" />
+                    <p className="text-[10px] text-amber-400">PVE auto-config failed: {webhookSetup.error}</p>
+                  </div>
+                  {webhookSetup.fallback_commands.length > 0 && (
+                    <pre className="text-[10px] bg-background p-1.5 rounded border border-border overflow-x-auto font-mono">
+{webhookSetup.fallback_commands.join('\n')}
+                    </pre>
+                  )}
+                </div>
+              )}
+
+              <div className="space-y-1.5">
+                <Label className="text-[11px] text-muted-foreground">Shared Secret</Label>
+                <div className="flex items-center gap-1.5">
+                  <Input
+                    type={showSecrets["wh_secret"] ? "text" : "password"}
+                    className="h-7 text-xs font-mono"
+                    placeholder="Required for webhook authentication"
+                    value={config.webhook_secret || ""}
+                    onChange={e => updateConfig(p => ({ ...p, webhook_secret: e.target.value }))}
+                    disabled={!editMode}
+                  />
+                  <button
+                    className="h-7 w-7 flex items-center justify-center rounded-md border border-border hover:bg-muted transition-colors shrink-0"
+                    onClick={() => toggleSecret("wh_secret")}
+                  >
+                    {showSecrets["wh_secret"] ? <EyeOff className="h-3 w-3" /> : <Eye className="h-3 w-3" />}
+                  </button>
+                </div>
+                <p className="text-[10px] text-muted-foreground">
+                  {"Used for remote connections only (e.g. PBS on another host). Local PVE webhook runs on localhost and does not need this header."}
+                </p>
+              </div>
+              <div className="space-y-1.5">
+                <Label className="text-[11px] text-muted-foreground">Allowed IPs (optional, remote only)</Label>
+                <Input
+                  className="h-7 text-xs font-mono"
+                  placeholder="10.0.0.5, 192.168.1.10 (empty = allow all)"
+                  value={config.webhook_allowed_ips || ""}
+                  onChange={e => updateConfig(p => ({ ...p, webhook_allowed_ips: e.target.value }))}
+                  disabled={!editMode}
+                />
+                <p className="text-[10px] text-muted-foreground">
+                  {"Localhost (127.0.0.1) is always allowed. This restricts remote callers only."}
+                </p>
+              </div>
+              </div>{/* close bordered webhook container */}
+
+              {/* PBS manual guide (collapsible) */}
+              <details className="group">
+                <summary className="text-[11px] font-medium text-muted-foreground cursor-pointer hover:text-foreground transition-colors flex items-center gap-1.5 py-1">
+                  <ChevronDown className="h-3 w-3 group-open:rotate-180 transition-transform" />
+                  Configure PBS notifications (manual)
+                </summary>
+                <div className="mt-1.5 p-2.5 bg-muted/30 rounded-md border border-border space-y-2">
+                  <p className="text-[11px] text-muted-foreground leading-relaxed">
+                    Backups launched from PVE are covered by the PVE webhook. PBS internal jobs
+                    (Verify, Prune, GC, Sync) require separate configuration on the PBS server.
+                  </p>
+                  <p className="text-[10px] font-medium text-muted-foreground">
+                    Append to /etc/proxmox-backup/notifications.cfg:
+                  </p>
+                  <pre className="text-[10px] bg-background p-2 rounded border border-border overflow-x-auto font-mono">
+{`webhook: proxmenux-webhook
+\tmethod post
+\turl http://<PVE_IP>:8008/api/notifications/webhook
+
+matcher: proxmenux-pbs
+\ttarget proxmenux-webhook
+\tmatch-severity warning,error`}
+                  </pre>
+                  <p className="text-[10px] text-muted-foreground">
+                    {"Replace <PVE_IP> with this node's IP. Append at the end -- do not delete existing content."}
+                  </p>
+                </div>
+              </details>
+            </div>
+
+            {/* ── Advanced: AI Enhancement ── */}
+            <div>
+              <button
+                className="flex items-center gap-2 text-xs text-muted-foreground hover:text-foreground transition-colors w-full py-1"
+                onClick={() => setShowAdvanced(!showAdvanced)}
+              >
+                {showAdvanced ? <ChevronUp className="h-3 w-3" /> : <ChevronDown className="h-3 w-3" />}
+                <span className="font-medium uppercase tracking-wider">Advanced: AI Enhancement</span>
+                {config.ai_enabled && (
+                  <Badge variant="outline" className="text-[9px] border-purple-500/30 text-purple-400 ml-1">
+                    ON
+                  </Badge>
+                )}
+              </button>
+
+              {showAdvanced && (
+                <div className="space-y-3 mt-3 p-3 rounded-lg bg-muted/30 border border-border/50">
+                  <div className="flex items-center justify-between">
+                    <div>
+                      <span className="text-xs font-medium">AI-Enhanced Messages</span>
+                      <p className="text-[10px] text-muted-foreground">Use AI to generate contextual notification messages</p>
+                    </div>
+                    <button
+                      className={`relative w-9 h-[18px] rounded-full transition-colors ${
+                        config.ai_enabled ? "bg-purple-600" : "bg-muted-foreground/30"
+                      } ${!editMode ? "opacity-60 cursor-not-allowed" : "cursor-pointer"}`}
+                      onClick={() => editMode && updateConfig(p => ({ ...p, ai_enabled: !p.ai_enabled }))}
+                      disabled={!editMode}
+                      role="switch"
+                      aria-checked={config.ai_enabled}
+                    >
+                      <span className={`absolute top-[1px] left-[1px] h-4 w-4 rounded-full bg-white shadow transition-transform ${
+                        config.ai_enabled ? "translate-x-[18px]" : "translate-x-0"
+                      }`} />
+                    </button>
+                  </div>
+
+                  {config.ai_enabled && (
+                    <>
+                      <div className="space-y-1.5">
+                        <Label className="text-[11px] text-muted-foreground">Provider</Label>
+                        <Select
+                          value={config.ai_provider}
+                          onValueChange={v => updateConfig(p => ({ ...p, ai_provider: v }))}
+                          disabled={!editMode}
+                        >
+                          <SelectTrigger className="h-7 text-xs">
+                            <SelectValue />
+                          </SelectTrigger>
+                          <SelectContent>
+                            {AI_PROVIDERS.map(p => (
+                              <SelectItem key={p.value} value={p.value}>{p.label}</SelectItem>
+                            ))}
+                          </SelectContent>
+                        </Select>
+                      </div>
+                      <div className="space-y-1.5">
+                        <Label className="text-[11px] text-muted-foreground">API Key</Label>
+                        <div className="flex items-center gap-1.5">
+                          <Input
+                            type={showSecrets["ai_key"] ? "text" : "password"}
+                            className="h-7 text-xs font-mono"
+                            placeholder="sk-..."
+                            value={config.ai_api_key}
+                            onChange={e => updateConfig(p => ({ ...p, ai_api_key: e.target.value }))}
+                            disabled={!editMode}
+                          />
+                          <button
+                            className="h-7 w-7 flex items-center justify-center rounded-md border border-border hover:bg-muted transition-colors shrink-0"
+                            onClick={() => toggleSecret("ai_key")}
+                          >
+                            {showSecrets["ai_key"] ? <EyeOff className="h-3 w-3" /> : <Eye className="h-3 w-3" />}
+                          </button>
+                        </div>
+                      </div>
+                      <div className="space-y-1.5">
+                        <Label className="text-[11px] text-muted-foreground">Model (optional)</Label>
+                        <Input
+                          className="h-7 text-xs font-mono"
+                          placeholder={config.ai_provider === "openai" ? "gpt-4o-mini" : "llama-3.3-70b-versatile"}
+                          value={config.ai_model}
+                          onChange={e => updateConfig(p => ({ ...p, ai_model: e.target.value }))}
+                          disabled={!editMode}
+                        />
+                      </div>
+                      <div className="flex items-start gap-2 p-2 rounded-md bg-purple-500/10 border border-purple-500/20">
+                        <Info className="h-3.5 w-3.5 text-purple-400 shrink-0 mt-0.5" />
+                        <p className="text-[10px] text-purple-400/90 leading-relaxed">
+                          AI enhancement is optional. When enabled, notifications include contextual analysis and recommended actions. If the AI service is unavailable, standard templates are used as fallback.
+                        </p>
+                      </div>
+                    </>
+                  )}
+                </div>
+              )}
+            </div>
+
+            {/* ── Notification History ── */}
+            <div>
+              <button
+                className="flex items-center gap-2 text-xs text-muted-foreground hover:text-foreground transition-colors w-full py-1"
+                onClick={() => setShowHistory(!showHistory)}
+              >
+                {showHistory ? <ChevronUp className="h-3 w-3" /> : <ChevronDown className="h-3 w-3" />}
+                <span className="font-medium uppercase tracking-wider">Recent History</span>
+                {history.length > 0 && (
+                  <Badge variant="outline" className="text-[9px] ml-1">{history.length}</Badge>
+                )}
+              </button>
+
+              {showHistory && (
+                <div className="mt-3 space-y-2">
+                  {history.length === 0 ? (
+                    <p className="text-xs text-muted-foreground text-center py-4">No notifications sent yet</p>
+                  ) : (
+                    <>
+                      <div className="flex items-center justify-end">
+                        <button
+                          className="h-6 px-2 text-[10px] rounded-md text-muted-foreground hover:text-red-400 transition-colors flex items-center gap-1"
+                          onClick={handleClearHistory}
+                        >
+                          <Trash2 className="h-3 w-3" />
+                          Clear
+                        </button>
+                      </div>
+                      <div className="space-y-1 max-h-48 overflow-y-auto">
+                        {history.map(entry => (
+                          <div
+                            key={entry.id}
+                            className="flex items-center gap-2 p-2 rounded-md bg-muted/30 border border-border/50"
+                          >
+                            {entry.success ? (
+                              <CheckCircle2 className="h-3 w-3 text-green-500 shrink-0" />
+                            ) : (
+                              <XCircle className="h-3 w-3 text-red-500 shrink-0" />
+                            )}
+                            <div className="flex-1 min-w-0">
+                              <span className="text-[11px] font-medium truncate block">{entry.title || entry.event_type}</span>
+                              <span className="text-[10px] text-muted-foreground">
+                                {entry.channel} - {new Date(entry.sent_at).toLocaleString()}
+                              </span>
+                            </div>
+                            <Badge
+                              variant="outline"
+                              className={`text-[9px] shrink-0 ${
+                                entry.severity === "critical"
+                                  ? "border-red-500/30 text-red-400"
+                                  : entry.severity === "warning"
+                                  ? "border-amber-500/30 text-amber-400"
+                                  : "border-blue-500/30 text-blue-400"
+                              }`}
+                            >
+                              {entry.severity}
+                            </Badge>
+                          </div>
+                        ))}
+                      </div>
+                    </>
+                  )}
+                </div>
+              )}
+            </div>
+          </>
+        )}
+
+        {/* ── Footer info ── */}
+        <div className="flex items-start gap-2 pt-3 border-t border-border">
+          <Info className="h-3.5 w-3.5 text-blue-400 shrink-0 mt-0.5" />
+          <p className="text-[11px] text-muted-foreground leading-relaxed">
+            {config.enabled
+              ? "Notifications are active. Events matching your severity filter and category selection will be sent to configured channels."
+              : "Enable notifications to receive alerts about system events, health status changes, and security incidents via Telegram, Gotify, Discord, or Email."}
+          </p>
+        </div>
+      </CardContent>
+    </Card>
+  )
+}
diff --git a/AppImage/components/settings.tsx b/AppImage/components/settings.tsx
index 4f037221..f2631177 100644
--- a/AppImage/components/settings.tsx
+++ b/AppImage/components/settings.tsx
@@ -3,6 +3,7 @@
 import { useState, useEffect } from "react"
 import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "./ui/card"
 import { Wrench, Package, Ruler, HeartPulse, Cpu, MemoryStick, HardDrive, CircleDot, Network, Server, Settings2, FileText, RefreshCw, Shield, AlertTriangle, Info, Loader2, Check } from "lucide-react"
+import { NotificationSettings } from "./notification-settings"
 import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "./ui/select"
 import { Input } from "./ui/input"
 import { Badge } from "./ui/badge"
@@ -438,6 +439,9 @@ export function Settings() {
         </CardContent>
       </Card>
 
+      {/* Notification Settings */}
+      <NotificationSettings />
+
       {/* ProxMenux Optimizations */}
       <Card>
         <CardHeader>
diff --git a/AppImage/components/storage-overview.tsx b/AppImage/components/storage-overview.tsx
index a9a4cf79..648993b9 100644
--- a/AppImage/components/storage-overview.tsx
+++ b/AppImage/components/storage-overview.tsx
@@ -34,6 +34,12 @@ interface DiskInfo {
   wear_leveling_count?: number // SSD: Wear Leveling Count
   total_lbas_written?: number // SSD/NVMe: Total LBAs Written (GB)
   ssd_life_left?: number // SSD: SSD Life Left percentage
+  io_errors?: {
+    count: number
+    severity: string
+    sample: string
+    reason: string
+  }
 }
 
 interface ZFSPool {
@@ -776,6 +782,17 @@ export function StorageOverview() {
                     </div>
                   </div>
 
+                  {disk.io_errors && disk.io_errors.count > 0 && (
+                    <div className={`flex items-start gap-2 p-2 rounded text-xs ${
+                      disk.io_errors.severity === 'CRITICAL'
+                        ? 'bg-red-500/10 text-red-400 border border-red-500/20'
+                        : 'bg-yellow-500/10 text-yellow-400 border border-yellow-500/20'
+                    }`}>
+                      <AlertTriangle className="h-3.5 w-3.5 flex-shrink-0 mt-0.5" />
+                      <span>{disk.io_errors.count} I/O error{disk.io_errors.count !== 1 ? 's' : ''} in 5 min</span>
+                    </div>
+                  )}
+
                   <div className="grid grid-cols-2 gap-4 text-sm">
                     {disk.size_formatted && (
                       <div>
@@ -841,6 +858,22 @@ export function StorageOverview() {
                     </div>
                   </div>
 
+                  {disk.io_errors && disk.io_errors.count > 0 && (
+                    <div className={`flex items-start gap-2 p-2 rounded text-xs ${
+                      disk.io_errors.severity === 'CRITICAL'
+                        ? 'bg-red-500/10 text-red-400 border border-red-500/20'
+                        : 'bg-yellow-500/10 text-yellow-400 border border-yellow-500/20'
+                    }`}>
+                      <AlertTriangle className="h-3.5 w-3.5 flex-shrink-0 mt-0.5" />
+                      <div>
+                        <span className="font-medium">{disk.io_errors.count} I/O error{disk.io_errors.count !== 1 ? 's' : ''} in 5 min</span>
+                        {disk.io_errors.sample && (
+                          <p className="mt-0.5 opacity-80 font-mono truncate max-w-md">{disk.io_errors.sample}</p>
+                        )}
+                      </div>
+                    </div>
+                  )}
+
                   <div className="grid grid-cols-2 md:grid-cols-4 gap-4 text-sm">
                     {disk.size_formatted && (
                       <div>
diff --git a/AppImage/scripts/build_appimage.sh b/AppImage/scripts/build_appimage.sh
index 447dd60e..3b5f3090 100644
--- a/AppImage/scripts/build_appimage.sh
+++ b/AppImage/scripts/build_appimage.sh
@@ -91,6 +91,11 @@ cp "$SCRIPT_DIR/proxmox_storage_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || e
 cp "$SCRIPT_DIR/flask_script_runner.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️  flask_script_runner.py not found"
 cp "$SCRIPT_DIR/security_manager.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️  security_manager.py not found"
 cp "$SCRIPT_DIR/flask_security_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️  flask_security_routes.py not found"
+cp "$SCRIPT_DIR/notification_manager.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️  notification_manager.py not found"
+cp "$SCRIPT_DIR/notification_channels.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️  notification_channels.py not found"
+cp "$SCRIPT_DIR/notification_templates.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️  notification_templates.py not found"
+cp "$SCRIPT_DIR/notification_events.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️  notification_events.py not found"
+cp "$SCRIPT_DIR/flask_notification_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️  flask_notification_routes.py not found"
 
 echo "📋 Adding translation support..."
 cat > "$APP_DIR/usr/bin/translate_cli.py" << 'PYEOF'
diff --git a/AppImage/scripts/flask_notification_routes.py b/AppImage/scripts/flask_notification_routes.py
new file mode 100644
index 00000000..c0a79c63
--- /dev/null
+++ b/AppImage/scripts/flask_notification_routes.py
@@ -0,0 +1,695 @@
+"""
+Flask routes for notification service configuration and management.
+Blueprint pattern matching flask_health_routes.py / flask_security_routes.py.
+"""
+
+import hmac
+import time
+import hashlib
+from collections import deque
+from flask import Blueprint, jsonify, request
+from notification_manager import notification_manager
+
+
+# ─── Webhook Hardening Helpers ───────────────────────────────────
+
+class WebhookRateLimiter:
+    """Simple sliding-window rate limiter for the webhook endpoint."""
+    
+    def __init__(self, max_requests: int = 60, window_seconds: int = 60):
+        self._max = max_requests
+        self._window = window_seconds
+        self._timestamps: deque = deque()
+    
+    def allow(self) -> bool:
+        now = time.time()
+        # Prune entries outside the window
+        while self._timestamps and now - self._timestamps[0] > self._window:
+            self._timestamps.popleft()
+        if len(self._timestamps) >= self._max:
+            return False
+        self._timestamps.append(now)
+        return True
+
+
+class ReplayCache:
+    """Bounded in-memory cache of recently seen request signatures (60s TTL)."""
+    
+    _MAX_SIZE = 2000  # Hard cap to prevent memory growth
+    
+    def __init__(self, ttl: int = 60):
+        self._ttl = ttl
+        self._seen: dict = {}  # signature -> timestamp
+    
+    def check_and_record(self, signature: str) -> bool:
+        """Return True if this signature was already seen (replay). Records it otherwise."""
+        now = time.time()
+        # Periodic cleanup
+        if len(self._seen) > self._MAX_SIZE // 2:
+            cutoff = now - self._ttl
+            self._seen = {k: v for k, v in self._seen.items() if v > cutoff}
+        if signature in self._seen and now - self._seen[signature] < self._ttl:
+            return True  # Replay detected
+        self._seen[signature] = now
+        return False
+
+
+# Module-level singletons (one per process)
+_webhook_limiter = WebhookRateLimiter(max_requests=60, window_seconds=60)
+_replay_cache = ReplayCache(ttl=60)
+
+# Timestamp validation window (seconds)
+_TIMESTAMP_MAX_DRIFT = 60
+
+notification_bp = Blueprint('notifications', __name__)
+
+
+@notification_bp.route('/api/notifications/settings', methods=['GET'])
+def get_notification_settings():
+    """Get all notification settings for the UI."""
+    try:
+        settings = notification_manager.get_settings()
+        return jsonify(settings)
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+
+
+@notification_bp.route('/api/notifications/settings', methods=['POST'])
+def save_notification_settings():
+    """Save notification settings from the UI."""
+    try:
+        payload = request.get_json()
+        if not payload:
+            return jsonify({'error': 'No data provided'}), 400
+        
+        result = notification_manager.save_settings(payload)
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+
+
+@notification_bp.route('/api/notifications/test', methods=['POST'])
+def test_notification():
+    """Send a test notification to one or all channels."""
+    try:
+        data = request.get_json() or {}
+        channel = data.get('channel', 'all')
+        
+        result = notification_manager.test_channel(channel)
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+
+
+@notification_bp.route('/api/notifications/status', methods=['GET'])
+def get_notification_status():
+    """Get notification service status."""
+    try:
+        status = notification_manager.get_status()
+        return jsonify(status)
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+
+
+@notification_bp.route('/api/notifications/history', methods=['GET'])
+def get_notification_history():
+    """Get notification history with optional filters."""
+    try:
+        limit = request.args.get('limit', 100, type=int)
+        offset = request.args.get('offset', 0, type=int)
+        severity = request.args.get('severity', '')
+        channel = request.args.get('channel', '')
+        
+        result = notification_manager.get_history(limit, offset, severity, channel)
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+
+
+@notification_bp.route('/api/notifications/history', methods=['DELETE'])
+def clear_notification_history():
+    """Clear all notification history."""
+    try:
+        result = notification_manager.clear_history()
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+
+
+@notification_bp.route('/api/notifications/send', methods=['POST'])
+def send_notification():
+    """Send a notification via API (for testing or external triggers)."""
+    try:
+        data = request.get_json()
+        if not data:
+            return jsonify({'error': 'No data provided'}), 400
+        
+        result = notification_manager.send_notification(
+            event_type=data.get('event_type', 'custom'),
+            severity=data.get('severity', 'INFO'),
+            title=data.get('title', ''),
+            message=data.get('message', ''),
+            data=data.get('data', {}),
+            source='api'
+        )
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+
+
+# ── PVE config constants ──
+_PVE_ENDPOINT_ID = 'proxmenux-webhook'
+_PVE_MATCHER_ID = 'proxmenux-default'
+_PVE_WEBHOOK_URL = 'http://127.0.0.1:8008/api/notifications/webhook'
+_PVE_NOTIFICATIONS_CFG = '/etc/pve/notifications.cfg'
+_PVE_PRIV_CFG = '/etc/pve/priv/notifications.cfg'
+_PVE_OUR_HEADERS = {
+    f'webhook: {_PVE_ENDPOINT_ID}',
+    f'matcher: {_PVE_MATCHER_ID}',
+}
+
+
+def _pve_read_file(path):
+    """Read file, return (content, error). Content is '' if missing."""
+    try:
+        with open(path, 'r') as f:
+            return f.read(), None
+    except FileNotFoundError:
+        return '', None
+    except PermissionError:
+        return None, f'Permission denied reading {path}'
+    except Exception as e:
+        return None, str(e)
+
+
+def _pve_backup_file(path):
+    """Create timestamped backup if file exists. Never fails fatally."""
+    import os, shutil
+    from datetime import datetime
+    try:
+        if os.path.exists(path):
+            ts = datetime.now().strftime('%Y%m%d_%H%M%S')
+            backup = f"{path}.proxmenux_backup_{ts}"
+            shutil.copy2(path, backup)
+    except Exception:
+        pass
+
+
+def _pve_remove_our_blocks(text, headers_to_remove):
+    """Remove only blocks whose header line matches one of ours.
+    
+    Preserves ALL other content byte-for-byte.
+    A block = header line + indented continuation lines + trailing blank line.
+    """
+    lines = text.splitlines(keepends=True)
+    cleaned = []
+    skip_block = False
+    
+    for line in lines:
+        stripped = line.strip()
+        
+        if stripped and not line[0:1].isspace() and ':' in stripped:
+            if stripped in headers_to_remove:
+                skip_block = True
+                continue
+            else:
+                skip_block = False
+        
+        if skip_block:
+            if not stripped:
+                skip_block = False
+                continue
+            elif line[0:1].isspace():
+                continue
+            else:
+                skip_block = False
+        
+        cleaned.append(line)
+    
+    return ''.join(cleaned)
+
+
+def _build_webhook_fallback():
+    """Build fallback manual commands for webhook setup."""
+    import base64
+    body_tpl = '{"title":"{{ escape title }}","message":"{{ escape message }}","severity":"{{ severity }}","timestamp":"{{ timestamp }}","fields":{{ json fields }}}'
+    body_b64 = base64.b64encode(body_tpl.encode()).decode()
+    return [
+        "# 1. Append to END of /etc/pve/notifications.cfg",
+        "#    (do NOT delete existing content):",
+        "",
+        f"webhook: {_PVE_ENDPOINT_ID}",
+        f"\tbody {body_b64}",
+        f"\tmethod post",
+        f"\turl {_PVE_WEBHOOK_URL}",
+        "",
+        f"matcher: {_PVE_MATCHER_ID}",
+        f"\ttarget {_PVE_ENDPOINT_ID}",
+        "\tmode all",
+        "",
+        "# 2. Append to /etc/pve/priv/notifications.cfg :",
+        f"webhook: {_PVE_ENDPOINT_ID}",
+    ]
+
+
+def setup_pve_webhook_core() -> dict:
+    """Core logic to configure PVE webhook. Callable from anywhere.
+    
+    Returns dict with 'configured', 'error', 'fallback_commands' keys.
+    Idempotent: safe to call multiple times.
+    """
+    import secrets as secrets_mod
+    
+    result = {
+        'configured': False,
+        'endpoint_id': _PVE_ENDPOINT_ID,
+        'matcher_id': _PVE_MATCHER_ID,
+        'url': _PVE_WEBHOOK_URL,
+        'fallback_commands': [],
+        'error': None,
+    }
+    
+    try:
+        # ── Step 1: Ensure webhook secret exists (for our own internal use) ──
+        secret = notification_manager.get_webhook_secret()
+        if not secret:
+            secret = secrets_mod.token_urlsafe(32)
+            notification_manager._save_setting('webhook_secret', secret)
+        
+        # ── Step 2: Read main config ──
+        cfg_text, err = _pve_read_file(_PVE_NOTIFICATIONS_CFG)
+        if err:
+            result['error'] = err
+            result['fallback_commands'] = _build_webhook_fallback()
+            return result
+        
+        # ── Step 3: Read priv config (to clean up any broken blocks we wrote before) ──
+        priv_text, err = _pve_read_file(_PVE_PRIV_CFG)
+        if err:
+            priv_text = None
+        
+        # ── Step 4: Create backups before ANY modification ──
+        _pve_backup_file(_PVE_NOTIFICATIONS_CFG)
+        if priv_text is not None:
+            _pve_backup_file(_PVE_PRIV_CFG)
+        
+        # ── Step 5: Remove any previous proxmenux blocks from BOTH files ──
+        cleaned_cfg = _pve_remove_our_blocks(cfg_text, _PVE_OUR_HEADERS)
+        
+        if priv_text is not None:
+            cleaned_priv = _pve_remove_our_blocks(priv_text, _PVE_OUR_HEADERS)
+        
+        # ── Step 6: Build new blocks ──
+        # Exact format from a real working PVE server:
+        #   webhook: name
+        #   \tmethod post
+        #   \turl http://...
+        #
+        # NO header lines -- localhost webhook doesn't need them.
+        # PVE header format is: header name=X-Key,value=<base64>
+        # PVE secret format is: secret name=key,value=<base64>
+        # Neither is needed for localhost calls.
+        
+        # PVE stores body as base64 in the config file.
+        # {{ escape title/message }} -- JSON-safe escaping of quotes/newlines.
+        # {{ json fields }} -- renders ALL PVE metadata as a JSON object
+        #   (type, hostname, job-id). This is a single Handlebars helper
+        #   that always works, even if fields is empty (renders {}).
+        import base64
+        body_template = '{"title":"{{ escape title }}","message":"{{ escape message }}","severity":"{{ severity }}","timestamp":"{{ timestamp }}","fields":{{ json fields }}}'
+        body_b64 = base64.b64encode(body_template.encode()).decode()
+        
+        endpoint_block = (
+            f"webhook: {_PVE_ENDPOINT_ID}\n"
+            f"\tbody {body_b64}\n"
+            f"\tmethod post\n"
+            f"\turl {_PVE_WEBHOOK_URL}\n"
+        )
+        
+        matcher_block = (
+            f"matcher: {_PVE_MATCHER_ID}\n"
+            f"\ttarget {_PVE_ENDPOINT_ID}\n"
+            f"\tmode all\n"
+        )
+        
+        # ── Step 7: Append our blocks to cleaned main config ──
+        if cleaned_cfg and not cleaned_cfg.endswith('\n'):
+            cleaned_cfg += '\n'
+        if cleaned_cfg and not cleaned_cfg.endswith('\n\n'):
+            cleaned_cfg += '\n'
+        
+        new_cfg = cleaned_cfg + endpoint_block + '\n' + matcher_block
+        
+        # ── Step 8: Write main config ──
+        try:
+            with open(_PVE_NOTIFICATIONS_CFG, 'w') as f:
+                f.write(new_cfg)
+        except PermissionError:
+            result['error'] = f'Permission denied writing {_PVE_NOTIFICATIONS_CFG}'
+            result['fallback_commands'] = _build_webhook_fallback()
+            return result
+        except Exception as e:
+            try:
+                with open(_PVE_NOTIFICATIONS_CFG, 'w') as f:
+                    f.write(cfg_text)
+            except Exception:
+                pass
+            result['error'] = str(e)
+            result['fallback_commands'] = _build_webhook_fallback()
+            return result
+        
+        # ── Step 9: Write priv config with our webhook entry ──
+        # PVE REQUIRES a matching block in priv/notifications.cfg for every
+        # webhook endpoint, even if it has no secrets. Without it PVE throws:
+        #   "Could not instantiate endpoint: private config does not exist"
+        priv_block = (
+            f"webhook: {_PVE_ENDPOINT_ID}\n"
+        )
+        
+        if priv_text is not None:
+            # Start from cleaned priv (our old blocks removed)
+            if cleaned_priv and not cleaned_priv.endswith('\n'):
+                cleaned_priv += '\n'
+            if cleaned_priv and not cleaned_priv.endswith('\n\n'):
+                cleaned_priv += '\n'
+            new_priv = cleaned_priv + priv_block
+        else:
+            new_priv = priv_block
+        
+        try:
+            with open(_PVE_PRIV_CFG, 'w') as f:
+                f.write(new_priv)
+        except PermissionError:
+            result['error'] = f'Permission denied writing {_PVE_PRIV_CFG}'
+            result['fallback_commands'] = _build_webhook_fallback()
+            return result
+        except Exception:
+            pass
+        
+        result['configured'] = True
+        result['secret'] = secret
+        return result
+    
+    except Exception as e:
+        result['error'] = str(e)
+        result['fallback_commands'] = _build_webhook_fallback()
+        return result
+
+
+@notification_bp.route('/api/notifications/proxmox/setup-webhook', methods=['POST'])
+def setup_proxmox_webhook():
+    """HTTP endpoint wrapper for webhook setup."""
+    return jsonify(setup_pve_webhook_core()), 200
+
+
+def cleanup_pve_webhook_core() -> dict:
+    """Core logic to remove PVE webhook blocks. Callable from anywhere.
+    
+    Returns dict with 'cleaned', 'error' keys.
+    Only removes blocks named 'proxmenux-webhook' / 'proxmenux-default'.
+    """
+    result = {'cleaned': False, 'error': None}
+    
+    try:
+        # Read both files
+        cfg_text, err = _pve_read_file(_PVE_NOTIFICATIONS_CFG)
+        if err:
+            result['error'] = err
+            return result
+        
+        priv_text, err = _pve_read_file(_PVE_PRIV_CFG)
+        if err:
+            priv_text = None
+        
+        # Check if our blocks actually exist before doing anything
+        has_our_blocks = any(
+            h in cfg_text for h in [f'webhook: {_PVE_ENDPOINT_ID}', f'matcher: {_PVE_MATCHER_ID}']
+        )
+        has_priv_blocks = priv_text and f'webhook: {_PVE_ENDPOINT_ID}' in priv_text
+        
+        if not has_our_blocks and not has_priv_blocks:
+            result['cleaned'] = True
+            return result
+        
+        # Backup before modification
+        _pve_backup_file(_PVE_NOTIFICATIONS_CFG)
+        if priv_text is not None:
+            _pve_backup_file(_PVE_PRIV_CFG)
+        
+        # Remove our blocks
+        if has_our_blocks:
+            cleaned_cfg = _pve_remove_our_blocks(cfg_text, _PVE_OUR_HEADERS)
+            try:
+                with open(_PVE_NOTIFICATIONS_CFG, 'w') as f:
+                    f.write(cleaned_cfg)
+            except PermissionError:
+                result['error'] = f'Permission denied writing {_PVE_NOTIFICATIONS_CFG}'
+                return result
+            except Exception as e:
+                # Rollback
+                try:
+                    with open(_PVE_NOTIFICATIONS_CFG, 'w') as f:
+                        f.write(cfg_text)
+                except Exception:
+                    pass
+                result['error'] = str(e)
+                return result
+        
+        if has_priv_blocks and priv_text is not None:
+            cleaned_priv = _pve_remove_our_blocks(priv_text, _PVE_OUR_HEADERS)
+            try:
+                with open(_PVE_PRIV_CFG, 'w') as f:
+                    f.write(cleaned_priv)
+            except Exception:
+                pass  # Best-effort
+        
+        result['cleaned'] = True
+        return result
+    
+    except Exception as e:
+        result['error'] = str(e)
+        return result
+
+
+@notification_bp.route('/api/notifications/proxmox/cleanup-webhook', methods=['POST'])
+def cleanup_proxmox_webhook():
+    """HTTP endpoint wrapper for webhook cleanup."""
+    return jsonify(cleanup_pve_webhook_core()), 200
+
+
+@notification_bp.route('/api/notifications/proxmox/read-cfg', methods=['GET'])
+def read_pve_notification_cfg():
+    """Diagnostic: return raw content of PVE notification config files.
+    
+    GET /api/notifications/proxmox/read-cfg
+    Returns both notifications.cfg and priv/notifications.cfg content.
+    """
+    import os
+    
+    files = {
+        'notifications_cfg': '/etc/pve/notifications.cfg',
+        'priv_cfg': '/etc/pve/priv/notifications.cfg',
+    }
+    
+    # Also look for any backups we created
+    backup_dir = '/etc/pve'
+    priv_backup_dir = '/etc/pve/priv'
+    
+    result = {}
+    for key, path in files.items():
+        try:
+            with open(path, 'r') as f:
+                result[key] = {
+                    'path': path,
+                    'content': f.read(),
+                    'size': os.path.getsize(path),
+                    'error': None,
+                }
+        except FileNotFoundError:
+            result[key] = {'path': path, 'content': None, 'size': 0, 'error': 'file_not_found'}
+        except PermissionError:
+            result[key] = {'path': path, 'content': None, 'size': 0, 'error': 'permission_denied'}
+        except Exception as e:
+            result[key] = {'path': path, 'content': None, 'size': 0, 'error': str(e)}
+    
+    # Find backups
+    backups = []
+    for d in [backup_dir, priv_backup_dir]:
+        try:
+            for fname in sorted(os.listdir(d)):
+                if 'proxmenux_backup' in fname:
+                    fpath = os.path.join(d, fname)
+                    try:
+                        with open(fpath, 'r') as f:
+                            backups.append({
+                                'path': fpath,
+                                'content': f.read(),
+                                'size': os.path.getsize(fpath),
+                            })
+                    except Exception:
+                        backups.append({'path': fpath, 'content': None, 'error': 'read_failed'})
+        except Exception:
+            pass
+    
+    result['backups'] = backups
+    return jsonify(result), 200
+
+
+@notification_bp.route('/api/notifications/proxmox/restore-cfg', methods=['POST'])
+def restore_pve_notification_cfg():
+    """Restore PVE notification config from our backup.
+    
+    POST /api/notifications/proxmox/restore-cfg
+    Finds the most recent proxmenux_backup and restores it.
+    """
+    import os
+    import shutil
+    
+    files_to_restore = {
+        '/etc/pve': '/etc/pve/notifications.cfg',
+        '/etc/pve/priv': '/etc/pve/priv/notifications.cfg',
+    }
+    
+    restored = []
+    errors = []
+    
+    for search_dir, target_path in files_to_restore.items():
+        try:
+            candidates = sorted([
+                f for f in os.listdir(search_dir)
+                if 'proxmenux_backup' in f and f.startswith('notifications.cfg')
+            ], reverse=True)
+            
+            if candidates:
+                backup_path = os.path.join(search_dir, candidates[0])
+                shutil.copy2(backup_path, target_path)
+                restored.append({'target': target_path, 'from_backup': backup_path})
+            else:
+                errors.append({'target': target_path, 'error': 'no_backup_found'})
+        except Exception as e:
+            errors.append({'target': target_path, 'error': str(e)})
+    
+    return jsonify({
+        'restored': restored,
+        'errors': errors,
+        'success': len(errors) == 0 and len(restored) > 0,
+    }), 200
+
+
+@notification_bp.route('/api/notifications/webhook', methods=['POST'])
+def proxmox_webhook():
+    """Receive native Proxmox VE notification webhooks (hardened).
+    
+    Security layers:
+      Localhost (127.0.0.1 / ::1): rate limiting only.
+        PVE calls us on localhost and cannot send custom auth headers,
+        so we trust the loopback interface (only local processes can reach it).
+      Remote: rate limiting + shared secret + timestamp + replay + IP allowlist.
+    """
+    _reject = lambda code, error, status: (jsonify({'accepted': False, 'error': error}), status)
+    
+    client_ip = request.remote_addr or ''
+    is_localhost = client_ip in ('127.0.0.1', '::1')
+    
+    # ── Layer 1: Rate limiting (always) ──
+    if not _webhook_limiter.allow():
+        resp = jsonify({'accepted': False, 'error': 'rate_limited'})
+        resp.headers['Retry-After'] = '60'
+        return resp, 429
+    
+    # ── Layers 2-5: Remote-only checks ──
+    if not is_localhost:
+        # Layer 2: Shared secret
+        try:
+            configured_secret = notification_manager.get_webhook_secret()
+        except Exception:
+            configured_secret = ''
+        
+        if configured_secret:
+            request_secret = request.headers.get('X-Webhook-Secret', '')
+            if not request_secret:
+                return _reject(401, 'missing_secret', 401)
+            if not hmac.compare_digest(configured_secret, request_secret):
+                return _reject(401, 'invalid_secret', 401)
+        
+        # Layer 3: Anti-replay timestamp
+        ts_header = request.headers.get('X-ProxMenux-Timestamp', '')
+        if not ts_header:
+            return _reject(401, 'missing_timestamp', 401)
+        try:
+            ts_value = int(ts_header)
+        except (ValueError, TypeError):
+            return _reject(401, 'invalid_timestamp', 401)
+        if abs(time.time() - ts_value) > _TIMESTAMP_MAX_DRIFT:
+            return _reject(401, 'timestamp_expired', 401)
+        
+        # Layer 4: Replay cache
+        raw_body = request.get_data(as_text=True) or ''
+        signature = hashlib.sha256(f"{ts_value}:{raw_body}".encode(errors='replace')).hexdigest()
+        if _replay_cache.check_and_record(signature):
+            return _reject(409, 'replay_detected', 409)
+        
+        # Layer 5: IP allowlist
+        try:
+            allowed_ips = notification_manager.get_webhook_allowed_ips()
+            if allowed_ips and client_ip not in allowed_ips:
+                return _reject(403, 'forbidden_ip', 403)
+        except Exception:
+            pass
+    
+    # ── Parse and process payload ──
+    try:
+        content_type = request.content_type or ''
+        raw_data = request.get_data(as_text=True) or ''
+        
+        # Try JSON first
+        payload = request.get_json(silent=True) or {}
+        
+        # If not JSON, try form data
+        if not payload:
+            payload = dict(request.form)
+        
+        # If still empty, try parsing raw data as JSON (PVE may not set Content-Type)
+        if not payload and raw_data:
+            import json
+            try:
+                payload = json.loads(raw_data)
+            except (json.JSONDecodeError, ValueError):
+                # PVE's {{ message }} may contain unescaped newlines/quotes
+                # that break JSON. Try to repair common issues.
+                try:
+                    repaired = raw_data.replace('\n', '\\n').replace('\r', '\\r')
+                    payload = json.loads(repaired)
+                except (json.JSONDecodeError, ValueError):
+                    # Try to extract fields with regex from broken JSON
+                    import re
+                    title_m = re.search(r'"title"\s*:\s*"([^"]*)"', raw_data)
+                    sev_m = re.search(r'"severity"\s*:\s*"([^"]*)"', raw_data)
+                    if title_m:
+                        payload = {
+                            'title': title_m.group(1),
+                            'body': raw_data[:1000],
+                            'severity': sev_m.group(1) if sev_m else 'info',
+                            'source': 'proxmox_hook',
+                        }
+        
+        # If still empty, try to salvage data from raw body
+        if not payload:
+            if raw_data:
+                # Last resort: treat raw text as the message body
+                payload = {
+                    'title': 'PVE Notification',
+                    'body': raw_data[:1000],
+                    'severity': 'info',
+                    'source': 'proxmox_hook',
+                }
+            else:
+                return _reject(400, 'empty_payload', 400)
+        
+        result = notification_manager.process_webhook(payload)
+        # Always return 200 to PVE -- a non-200 makes PVE report the webhook as broken.
+        # The 'accepted' field in the JSON body indicates actual processing status.
+        return jsonify(result), 200
+    except Exception as e:
+        # Still return 200 to avoid PVE flagging the webhook as broken
+        return jsonify({'accepted': False, 'error': 'internal_error', 'detail': str(e)}), 200
diff --git a/AppImage/scripts/flask_server.py b/AppImage/scripts/flask_server.py
index 003d15f4..dd38090e 100644
--- a/AppImage/scripts/flask_server.py
+++ b/AppImage/scripts/flask_server.py
@@ -23,6 +23,7 @@ import time
 import threading
 import urllib.parse
 import hardware_monitor
+import health_persistence
 import xml.etree.ElementTree as ET
 from datetime import datetime, timedelta
 from functools import wraps
@@ -46,6 +47,8 @@ from flask_health_routes import health_bp  # noqa: E402
 from flask_auth_routes import auth_bp  # noqa: E402
 from flask_proxmenux_routes import proxmenux_bp  # noqa: E402
 from flask_security_routes import security_bp  # noqa: E402
+from flask_notification_routes import notification_bp  # noqa: E402
+from notification_manager import notification_manager  # noqa: E402
 from jwt_middleware import require_auth  # noqa: E402
 import auth_manager  # noqa: E402
 
@@ -120,6 +123,7 @@ app.register_blueprint(auth_bp)
 app.register_blueprint(health_bp)
 app.register_blueprint(proxmenux_bp)
 app.register_blueprint(security_bp)
+app.register_blueprint(notification_bp)
 
 # Initialize terminal / WebSocket routes
 init_terminal_routes(app)
@@ -1156,19 +1160,66 @@ def get_storage_info():
                             'ssd_life_left': smart_data.get('ssd_life_left') # Added
                         }
                         
-                        storage_data['disk_count'] += 1
-                        health = smart_data.get('health', 'unknown').lower()
-                        if health == 'healthy':
-                            storage_data['healthy_disks'] += 1
-                        elif health == 'warning':
-                            storage_data['warning_disks'] += 1
-                        elif health in ['critical', 'failed']:
-                            storage_data['critical_disks'] += 1
-                            
         except Exception as e:
-            # print(f"Error getting disk list: {e}")
             pass
         
+        # Enrich physical disks with active I/O errors from health_persistence.
+        # This is the single source of truth -- health_monitor detects ATA/SCSI/IO
+        # errors via dmesg, records them in health_persistence, and we read them here.
+        try:
+            active_disk_errors = health_persistence.get_active_errors(category='disks')
+            for err in active_disk_errors:
+                details = err.get('details', {})
+                if isinstance(details, str):
+                    try:
+                        details = json.loads(details)
+                    except (json.JSONDecodeError, TypeError):
+                        details = {}
+                
+                err_device = details.get('disk', '')
+                error_count = details.get('error_count', 0)
+                sample = details.get('sample', '')
+                severity = err.get('severity', 'WARNING')
+                
+                # Match error to physical disk.
+                # err_device can be 'sda', 'nvme0n1', or 'ata8' (if resolution failed)
+                matched_disk = None
+                if err_device in physical_disks:
+                    matched_disk = err_device
+                else:
+                    # Try partial match: 'sda' matches disk 'sda'
+                    for dk in physical_disks:
+                        if dk == err_device or err_device.startswith(dk):
+                            matched_disk = dk
+                            break
+                
+                if matched_disk:
+                    physical_disks[matched_disk]['io_errors'] = {
+                        'count': error_count,
+                        'severity': severity,
+                        'sample': sample,
+                        'reason': err.get('reason', ''),
+                    }
+                    # Override health status if I/O errors are more severe
+                    current_health = physical_disks[matched_disk].get('health', 'unknown').lower()
+                    if severity == 'CRITICAL' and current_health != 'critical':
+                        physical_disks[matched_disk]['health'] = 'critical'
+                    elif severity == 'WARNING' and current_health in ('healthy', 'unknown'):
+                        physical_disks[matched_disk]['health'] = 'warning'
+        except Exception:
+            pass
+        
+        # Count disk health states AFTER I/O error enrichment
+        for disk_name, disk_info in physical_disks.items():
+            storage_data['disk_count'] += 1
+            health = disk_info.get('health', 'unknown').lower()
+            if health == 'healthy':
+                storage_data['healthy_disks'] += 1
+            elif health == 'warning':
+                storage_data['warning_disks'] += 1
+            elif health in ['critical', 'failed']:
+                storage_data['critical_disks'] += 1
+        
         storage_data['total'] = round(total_disk_size_bytes / (1024**4), 1)
         
         # Get disk usage for mounted partitions
@@ -7094,6 +7145,16 @@ if __name__ == '__main__':
     except Exception as e:
         print(f"[ProxMenux] Vital signs sampler failed to start: {e}")
 
+    # ── Notification Service ──
+    try:
+        notification_manager.start()
+        if notification_manager._enabled:
+            print(f"[ProxMenux] Notification service started (channels: {list(notification_manager._channels.keys())})")
+        else:
+            print("[ProxMenux] Notification service loaded (disabled - configure in Settings)")
+    except Exception as e:
+        print(f"[ProxMenux] Notification service failed to start: {e}")
+
     # Check for SSL configuration
     ssl_ctx = None
     try:
diff --git a/AppImage/scripts/health_monitor.py b/AppImage/scripts/health_monitor.py
index ef381192..30eb884b 100644
--- a/AppImage/scripts/health_monitor.py
+++ b/AppImage/scripts/health_monitor.py
@@ -324,6 +324,13 @@ class HealthMonitor:
         Returns JSON structure with ALL 10 categories always present.
         Now includes persistent error tracking.
         """
+        # Run cleanup on every status check so stale errors are auto-resolved
+        # using the user-configured Suppression Duration (single source of truth).
+        try:
+            health_persistence.cleanup_old_errors()
+        except Exception:
+            pass
+        
         active_errors = health_persistence.get_active_errors()
         # No need to create persistent_issues dict here, it's implicitly handled by the checks
         
@@ -821,8 +828,20 @@ class HealthMonitor:
         issues = []
         storage_details = {}
         
-        # Check disk usage and mount status first for critical mounts
-        critical_mounts = ['/']
+        # Check disk usage and mount status for important mounts.
+        # We detect actual mountpoints dynamically rather than hard-coding.
+        critical_mounts = set()
+        critical_mounts.add('/')
+        try:
+            for part in psutil.disk_partitions(all=False):
+                mp = part.mountpoint
+                # Include standard system mounts and PVE storage
+                if mp in ('/', '/var', '/tmp', '/boot', '/boot/efi') or \
+                   mp.startswith('/var/lib/vz') or mp.startswith('/mnt/'):
+                    critical_mounts.add(mp)
+        except Exception:
+            pass
+        critical_mounts = sorted(critical_mounts)
         
         for mount_point in critical_mounts:
             try:
@@ -857,9 +876,32 @@ class HealthMonitor:
                 # Check filesystem usage only if not already flagged as critical
                 if mount_point not in storage_details or storage_details[mount_point].get('status') == 'OK':
                     fs_status = self._check_filesystem(mount_point)
+                    error_key = f'disk_space_{mount_point}'
                     if fs_status['status'] != 'OK':
                         issues.append(f"{mount_point}: {fs_status['reason']}")
                         storage_details[mount_point] = fs_status
+                        # Record persistent error for notifications
+                        usage = psutil.disk_usage(mount_point)
+                        avail_gb = usage.free / (1024**3)
+                        if avail_gb >= 1:
+                            avail_str = f"{avail_gb:.1f} GiB"
+                        else:
+                            avail_str = f"{usage.free / (1024**2):.0f} MiB"
+                        health_persistence.record_error(
+                            error_key=error_key,
+                            category='disk',
+                            severity=fs_status['status'],
+                            reason=f'{mount_point}: {fs_status["reason"]}',
+                            details={
+                                'mount': mount_point,
+                                'used': str(round(usage.percent, 1)),
+                                'available': avail_str,
+                                'dismissable': False,
+                            }
+                        )
+                    else:
+                        # Space recovered -- clear any previous alert
+                        health_persistence.clear_error(error_key)
             except Exception:
                 pass # Silently skip if mountpoint check fails
         
@@ -1052,16 +1094,67 @@ class HealthMonitor:
         
         return storages
     
+    def _resolve_ata_to_disk(self, ata_port: str) -> str:
+        """Resolve an ATA controller name (e.g. 'ata8') to a block device (e.g. 'sda').
+        
+        Uses /sys/class/ata_port/ symlinks and /sys/block/ to find the mapping.
+        Falls back to parsing dmesg for 'ata8: SATA link up' -> 'sd 7:0:0:0: [sda]'.
+        """
+        if not ata_port or not ata_port.startswith('ata'):
+            return ata_port
+        
+        port_num = ata_port.replace('ata', '')
+        
+        # Method 1: Walk /sys/class/ata_port/ -> host -> target -> block
+        try:
+            ata_path = f'/sys/class/ata_port/{ata_port}'
+            if os.path.exists(ata_path):
+                device_path = os.path.realpath(ata_path)
+                # Walk up to find the SCSI host, then find block devices
+                # Path: /sys/devices/.../ataX/hostY/targetY:0:0/Y:0:0:0/block/sdZ
+                for root, dirs, files in os.walk(os.path.dirname(device_path)):
+                    if 'block' in dirs:
+                        block_path = os.path.join(root, 'block')
+                        devs = os.listdir(block_path)
+                        if devs:
+                            return devs[0]  # e.g. 'sda'
+        except (OSError, IOError):
+            pass
+        
+        # Method 2: Parse dmesg for ATA link messages
+        try:
+            result = subprocess.run(
+                ['dmesg', '--notime'],
+                capture_output=True, text=True, timeout=2
+            )
+            if result.returncode == 0:
+                # Look for "ata8: SATA link up" followed by "sd X:0:0:0: [sda]"
+                lines = result.stdout.split('\n')
+                host_num = None
+                for line in lines:
+                    m = re.search(rf'{ata_port}:\s+SATA link', line)
+                    if m:
+                        # ata port number maps to host(N-1) typically
+                        host_num = int(port_num) - 1
+                    if host_num is not None:
+                        m2 = re.search(rf'sd\s+{host_num}:\d+:\d+:\d+:\s+\[(\w+)\]', line)
+                        if m2:
+                            return m2.group(1)
+        except (OSError, subprocess.TimeoutExpired):
+            pass
+        
+        return ata_port  # Return original if resolution fails
+    
     def _check_disks_optimized(self) -> Dict[str, Any]:
         """
-        Optimized disk check - always returns status.
-        Checks dmesg for I/O errors and SMART status.
-        NOTE: This function is now largely covered by _check_storage_optimized,
-              but kept for potential specific disk-level reporting if needed.
-              Currently, its primary function is to detect recent I/O errors.
+        Disk I/O error check -- the SINGLE source of truth for disk errors.
+        
+        Reads dmesg for I/O/ATA/SCSI errors, counts per device, records in
+        health_persistence, and returns status for the health dashboard.
+        Resolves ATA controller names (ata8) to physical disks (sda).
         """
         current_time = time.time()
-        disk_issues = {}
+        disk_results = {}  # Single dict for both WARNING and CRITICAL
         
         try:
             # Check dmesg for I/O errors in the last 5 minutes
@@ -1072,17 +1165,52 @@ class HealthMonitor:
                 timeout=2
             )
             
+            # Collect a sample line per device for richer error messages
+            disk_samples = {}
+            
             if result.returncode == 0:
                 for line in result.stdout.split('\n'):
                     line_lower = line.lower()
-                    if any(keyword in line_lower for keyword in ['i/o error', 'ata error', 'scsi error', 'medium error']):
-                        # Try to extract disk name
-                        disk_match = re.search(r'/dev/(sd[a-z]|nvme\d+n\d+)', line)
-                        if disk_match:
-                            disk_name = disk_match.group(1)
+                    # Detect various disk error formats
+                    is_disk_error = any(kw in line_lower for kw in [
+                        'i/o error', 'scsi error', 'medium error',
+                        'failed command:', 'exception emask',
+                    ])
+                    ata_match = re.search(r'(ata\d+)[\.\d]*:.*(?:error|failed|exception)', line_lower)
+                    if ata_match:
+                        is_disk_error = True
+                    
+                    if is_disk_error:
+                        # Extract device from multiple formats
+                        raw_device = None
+                        for dev_re in [
+                            r'dev\s+(sd[a-z]+)',          # dev sdb
+                            r'\[(sd[a-z]+)\]',            # [sda]
+                            r'/dev/(sd[a-z]+)',            # /dev/sda
+                            r'(nvme\d+n\d+)',             # nvme0n1
+                            r'device\s+(sd[a-z]+\d*)',    # device sda1
+                            r'(ata\d+)',                  # ata8 (ATA controller)
+                        ]:
+                            dm = re.search(dev_re, line)
+                            if dm:
+                                raw_device = dm.group(1)
+                                break
+                        
+                        if raw_device:
+                            # Resolve ATA port to physical disk name
+                            if raw_device.startswith('ata'):
+                                resolved = self._resolve_ata_to_disk(raw_device)
+                                disk_name = resolved
+                            else:
+                                disk_name = raw_device.rstrip('0123456789') if raw_device.startswith('sd') else raw_device
+                            
                             self.io_error_history[disk_name].append(current_time)
+                            if disk_name not in disk_samples:
+                                # Clean the sample: strip dmesg timestamp prefix
+                                clean = re.sub(r'^\[.*?\]\s*', '', line.strip())
+                                disk_samples[disk_name] = clean[:200]
                 
-                # Clean old history (keep errors from the last 5 minutes)
+                # Clean old history and evaluate per-disk status
                 for disk in list(self.io_error_history.keys()):
                     self.io_error_history[disk] = [
                         t for t in self.io_error_history[disk]
@@ -1090,57 +1218,67 @@ class HealthMonitor:
                     ]
                     
                     error_count = len(self.io_error_history[disk])
+                    error_key = f'disk_{disk}'
+                    sample = disk_samples.get(disk, '')
+                    display = f'/dev/{disk}' if not disk.startswith('/') else disk
                     
-                    # Report based on recent error count
                     if error_count >= 3:
-                        error_key = f'disk_{disk}'
                         severity = 'CRITICAL'
-                        reason = f'{error_count} I/O errors in 5 minutes'
+                        reason = f'{display}: {error_count} I/O errors in 5 min'
+                        if sample:
+                            reason += f'\n{sample}'
                         
                         health_persistence.record_error(
                             error_key=error_key,
                             category='disks',
                             severity=severity,
                             reason=reason,
-                            details={'disk': disk, 'error_count': error_count, 'dismissable': False}
+                            details={'disk': disk, 'device': display,
+                                     'error_count': error_count,
+                                     'sample': sample, 'dismissable': False}
                         )
-                        
-                        disk_details[disk] = {
+                        disk_results[display] = {
                             'status': severity,
                             'reason': reason,
-                            'dismissable': False
+                            'device': disk,
+                            'error_count': error_count,
+                            'dismissable': False,
                         }
                     elif error_count >= 1:
-                        error_key = f'disk_{disk}'
                         severity = 'WARNING'
-                        reason = f'{error_count} I/O error(s) in 5 minutes'
+                        reason = f'{display}: {error_count} I/O error(s) in 5 min'
+                        if sample:
+                            reason += f'\n{sample}'
                         
-                        health_persistence.record_error(
+                        rec_result = health_persistence.record_error(
                             error_key=error_key,
                             category='disks',
                             severity=severity,
                             reason=reason,
-                            details={'disk': disk, 'error_count': error_count, 'dismissable': True}
+                            details={'disk': disk, 'device': display,
+                                     'error_count': error_count,
+                                     'sample': sample, 'dismissable': True}
                         )
-                        
-                        disk_issues[f'/dev/{disk}'] = {
-                            'status': severity,
-                            'reason': reason,
-                            'dismissable': True
-                        }
+                        if not rec_result or rec_result.get('type') != 'skipped_acknowledged':
+                            disk_results[display] = {
+                                'status': severity,
+                                'reason': reason,
+                                'device': disk,
+                                'error_count': error_count,
+                                'dismissable': True,
+                            }
                     else:
-                        error_key = f'disk_{disk}'
                         health_persistence.resolve_error(error_key, 'Disk errors cleared')
             
-            if not disk_issues:
+            if not disk_results:
                 return {'status': 'OK'}
             
-            has_critical = any(d.get('status') == 'CRITICAL' for d in disk_issues.values())
+            has_critical = any(d.get('status') == 'CRITICAL' for d in disk_results.values())
             
             return {
                 'status': 'CRITICAL' if has_critical else 'WARNING',
-                'reason': f"{len(disk_issues)} disk(s) with recent errors",
-                'details': disk_issues
+                'reason': f"{len(disk_results)} disk(s) with recent errors",
+                'details': disk_results
             }
         
         except Exception as e:
@@ -1351,12 +1489,51 @@ class HealthMonitor:
         except Exception:
             return {'status': 'UNKNOWN', 'reason': 'Ping command failed'}
     
+    def _is_vzdump_active(self) -> bool:
+        """Check if a vzdump (backup) job is currently running."""
+        try:
+            with open('/var/log/pve/tasks/active', 'r') as f:
+                for line in f:
+                    if ':vzdump:' in line:
+                        return True
+        except (OSError, IOError):
+            pass
+        return False
+    
+    def _resolve_vm_name(self, vmid: str) -> str:
+        """Resolve VMID to guest name from PVE config files."""
+        if not vmid:
+            return ''
+        for base in ['/etc/pve/qemu-server', '/etc/pve/lxc']:
+            conf = os.path.join(base, f'{vmid}.conf')
+            try:
+                with open(conf) as f:
+                    for line in f:
+                        if line.startswith('hostname:') or line.startswith('name:'):
+                            return line.split(':', 1)[1].strip()
+            except (OSError, IOError):
+                continue
+        return ''
+    
     def _check_vms_cts_optimized(self) -> Dict[str, Any]:
         """
         Optimized VM/CT check - detects qmp failures and startup errors from logs.
         Improved detection of container and VM errors from journalctl.
         """
         try:
+            # First: auto-resolve any persisted VM/CT errors where the guest
+            # is now running.  This clears stale "Failed to start" / QMP
+            # errors that are no longer relevant.
+            try:
+                active_vm_errors = health_persistence.get_active_errors('vms')
+                for err in active_vm_errors:
+                    details = err.get('details') or {}
+                    vmid = details.get('id', '')
+                    if vmid:
+                        health_persistence.check_vm_running(vmid)
+            except Exception:
+                pass
+            
             issues = []
             vm_details = {}
             
@@ -1367,20 +1544,28 @@ class HealthMonitor:
                 timeout=3
             )
             
+            # Check if vzdump is running -- QMP timeouts during backup are normal
+            _vzdump_running = self._is_vzdump_active()
+            
             if result.returncode == 0:
                 for line in result.stdout.split('\n'):
                     line_lower = line.lower()
                     
                     vm_qmp_match = re.search(r'vm\s+(\d+)\s+qmp\s+command.*(?:failed|unable|timeout)', line_lower)
                     if vm_qmp_match:
+                        if _vzdump_running:
+                            continue  # Normal during backup
                         vmid = vm_qmp_match.group(1)
+                        vm_name = self._resolve_vm_name(vmid)
+                        display = f"VM {vmid} ({vm_name})" if vm_name else f"VM {vmid}"
                         key = f'vm_{vmid}'
                         if key not in vm_details:
-                            issues.append(f'VM {vmid}: Communication issue')
+                            issues.append(f'{display}: QMP communication issue')
                             vm_details[key] = {
                                 'status': 'WARNING',
-                                'reason': 'QMP command timeout',
+                                'reason': f'{display}: QMP command failed or timed out.\n{line.strip()[:200]}',
                                 'id': vmid,
+                                'vmname': vm_name,
                                 'type': 'VM'
                             }
                         continue
@@ -1401,11 +1586,15 @@ class HealthMonitor:
                             else:
                                 reason = 'Container error'
                             
-                            issues.append(f'CT {ctid}: {reason}')
+                            ct_name = self._resolve_vm_name(ctid)
+                            display = f"CT {ctid} ({ct_name})" if ct_name else f"CT {ctid}"
+                            full_reason = f'{display}: {reason}\n{line.strip()[:200]}'
+                            issues.append(f'{display}: {reason}')
                             vm_details[key] = {
                                 'status': 'WARNING' if 'device' in reason.lower() else 'CRITICAL',
-                                'reason': reason,
+                                'reason': full_reason,
                                 'id': ctid,
+                                'vmname': ct_name,
                                 'type': 'CT'
                             }
                         continue
@@ -1440,11 +1629,15 @@ class HealthMonitor:
                             vmid = id_match.group(1)
                             key = f'vmct_{vmid}'
                             if key not in vm_details:
-                                issues.append(f'VM/CT {vmid}: Failed to start')
+                                vm_name = self._resolve_vm_name(vmid)
+                                display = f"VM/CT {vmid} ({vm_name})" if vm_name else f"VM/CT {vmid}"
+                                full_reason = f'{display}: Failed to start\n{line.strip()[:200]}'
+                                issues.append(f'{display}: Failed to start')
                                 vm_details[key] = {
                                     'status': 'CRITICAL',
-                                    'reason': 'Failed to start',
+                                    'reason': full_reason,
                                     'id': vmid,
+                                    'vmname': vm_name,
                                     'type': 'VM/CT'
                                 }
             
@@ -1504,31 +1697,38 @@ class HealthMonitor:
                 timeout=3
             )
             
+            _vzdump_running = self._is_vzdump_active()
+            
             if result.returncode == 0:
                 for line in result.stdout.split('\n'):
                     line_lower = line.lower()
                     
-                    # VM QMP errors
+                    # VM QMP errors (skip during active backup -- normal behavior)
                     vm_qmp_match = re.search(r'vm\s+(\d+)\s+qmp\s+command.*(?:failed|unable|timeout)', line_lower)
                     if vm_qmp_match:
+                        if _vzdump_running:
+                            continue  # Normal during backup
                         vmid = vm_qmp_match.group(1)
+                        vm_name = self._resolve_vm_name(vmid)
+                        display = f"VM {vmid} ({vm_name})" if vm_name else f"VM {vmid}"
                         error_key = f'vm_{vmid}'
                         if error_key not in vm_details:
-                            # Record persistent error
-                            health_persistence.record_error(
+                            rec_result = health_persistence.record_error(
                                 error_key=error_key,
                                 category='vms',
                                 severity='WARNING',
-                                reason='QMP command timeout',
-                                details={'id': vmid, 'type': 'VM'}
+                                reason=f'{display}: QMP command failed or timed out.\n{line.strip()[:200]}',
+                                details={'id': vmid, 'vmname': vm_name, 'type': 'VM'}
                             )
-                            issues.append(f'VM {vmid}: Communication issue')
-                            vm_details[error_key] = {
-                                'status': 'WARNING',
-                                'reason': 'QMP command timeout',
-                                'id': vmid,
-                                'type': 'VM'
-                            }
+                            if not rec_result or rec_result.get('type') != 'skipped_acknowledged':
+                                issues.append(f'{display}: QMP communication issue')
+                                vm_details[error_key] = {
+                                    'status': 'WARNING',
+                                    'reason': f'{display}: QMP command failed or timed out',
+                                    'id': vmid,
+                                    'vmname': vm_name,
+                                    'type': 'VM'
+                                }
                         continue
                     
                     # Container errors (including startup issues via vzstart)
@@ -1548,20 +1748,21 @@ class HealthMonitor:
                                 reason = 'Startup error'
                             
                             # Record persistent error
-                            health_persistence.record_error(
+                            rec_result = health_persistence.record_error(
                                 error_key=error_key,
                                 category='vms',
                                 severity='WARNING',
                                 reason=reason,
                                 details={'id': ctid, 'type': 'CT'}
                             )
-                            issues.append(f'CT {ctid}: {reason}')
-                            vm_details[error_key] = {
-                                'status': 'WARNING',
-                                'reason': reason,
-                                'id': ctid,
-                                'type': 'CT'
-                            }
+                            if not rec_result or rec_result.get('type') != 'skipped_acknowledged':
+                                issues.append(f'CT {ctid}: {reason}')
+                                vm_details[error_key] = {
+                                    'status': 'WARNING',
+                                    'reason': reason,
+                                    'id': ctid,
+                                    'type': 'CT'
+                                }
                     
                     # Generic failed to start for VMs and CTs
                     if any(keyword in line_lower for keyword in ['failed to start', 'cannot start', 'activation failed', 'start error']):
@@ -1586,22 +1787,28 @@ class HealthMonitor:
                                 vm_type = 'VM/CT'
                             
                             if error_key not in vm_details:
-                                reason = 'Failed to start'
+                                vm_name = self._resolve_vm_name(vmid_ctid)
+                                display = f"{vm_type} {vmid_ctid}"
+                                if vm_name:
+                                    display = f"{vm_type} {vmid_ctid} ({vm_name})"
+                                reason = f'{display}: Failed to start\n{line.strip()[:200]}'
                                 # Record persistent error
-                                health_persistence.record_error(
+                                rec_result = health_persistence.record_error(
                                     error_key=error_key,
                                     category='vms',
                                     severity='CRITICAL',
                                     reason=reason,
-                                    details={'id': vmid_ctid, 'type': vm_type}
+                                    details={'id': vmid_ctid, 'vmname': vm_name, 'type': vm_type}
                                 )
-                                issues.append(f'{vm_type} {vmid_ctid}: {reason}')
-                                vm_details[error_key] = {
-                                    'status': 'CRITICAL',
-                                    'reason': reason,
-                                    'id': vmid_ctid,
-                                    'type': vm_type
-                                }
+                                if not rec_result or rec_result.get('type') != 'skipped_acknowledged':
+                                    issues.append(f'{display}: Failed to start')
+                                    vm_details[error_key] = {
+                                        'status': 'CRITICAL',
+                                        'reason': reason,
+                                        'id': vmid_ctid,
+                                        'vmname': vm_name,
+                                        'type': vm_type
+                                    }
             
             # Build checks dict from vm_details
             checks = {}
@@ -1692,16 +1899,23 @@ class HealthMonitor:
             if failed_services:
                 reason = f'Services inactive: {", ".join(failed_services)}'
                 
-                # Record each failed service in persistence
+                # Record each failed service in persistence, respecting dismiss
+                active_failed = []
                 for svc in failed_services:
                     error_key = f'pve_service_{svc}'
-                    health_persistence.record_error(
+                    rec_result = health_persistence.record_error(
                         error_key=error_key,
                         category='pve_services',
                         severity='CRITICAL',
                         reason=f'PVE service {svc} is {service_details.get(svc, "inactive")}',
                         details={'service': svc, 'state': service_details.get(svc, 'inactive')}
                     )
+                    if rec_result and rec_result.get('type') == 'skipped_acknowledged':
+                        # Mark as dismissed in checks for frontend
+                        if svc in checks:
+                            checks[svc]['dismissed'] = True
+                    else:
+                        active_failed.append(svc)
                 
                 # Auto-clear services that recovered
                 for svc in services_to_check:
@@ -1710,10 +1924,21 @@ class HealthMonitor:
                         if health_persistence.is_error_active(error_key):
                             health_persistence.clear_error(error_key)
                 
+                # If all failed services are dismissed, return OK
+                if not active_failed:
+                    return {
+                        'status': 'OK',
+                        'reason': None,
+                        'failed': [],
+                        'is_cluster': is_cluster,
+                        'services_checked': len(services_to_check),
+                        'checks': checks
+                    }
+                
                 return {
                     'status': 'CRITICAL',
-                    'reason': reason,
-                    'failed': failed_services,
+                    'reason': f'Services inactive: {", ".join(active_failed)}',
+                    'failed': active_failed,
                     'is_cluster': is_cluster,
                     'services_checked': len(services_to_check),
                     'checks': checks
@@ -1871,7 +2096,8 @@ class HealthMonitor:
                         self.persistent_log_patterns[pattern] = {
                             'count': 1,
                             'first_seen': current_time,
-                            'last_seen': current_time
+                            'last_seen': current_time,
+                            'sample': line.strip()[:200],  # Original line for display
                         }
                 
                 for line in previous_lines:
@@ -1903,6 +2129,18 @@ class HealthMonitor:
                     if recent_count >= 5 and recent_count >= prev_count * 4:
                         spike_errors[pattern] = recent_count
                 
+                # Helper: get human-readable samples from normalized patterns
+                def _get_samples(error_dict, max_items=3):
+                    """Return list of readable sample lines for error patterns."""
+                    samples = []
+                    for pattern in list(error_dict.keys())[:max_items]:
+                        pdata = self.persistent_log_patterns.get(pattern, {})
+                        sample = pdata.get('sample', pattern)
+                        # Trim timestamp prefix if present (e.g. "Feb 27 16:03:35 host ")
+                        clean = re.sub(r'^[A-Z][a-z]{2}\s+\d+\s+[\d:]+\s+\S+\s+', '', sample)
+                        samples.append(clean[:120])
+                    return samples
+                
                 persistent_errors = {}
                 for pattern, data in self.persistent_log_patterns.items():
                     time_span = current_time - data['first_seen']
@@ -1913,12 +2151,16 @@ class HealthMonitor:
                         pattern_hash = hashlib.md5(pattern.encode()).hexdigest()[:8]
                         error_key = f'log_persistent_{pattern_hash}'
                         if not health_persistence.is_error_active(error_key, category='logs'):
+                            # Use the original sample line for the notification,
+                            # not the normalized pattern (which has IDs replaced).
+                            sample = data.get('sample', pattern)
                             health_persistence.record_error(
                                 error_key=error_key,
                                 category='logs',
                                 severity='WARNING',
-                                reason=f'Persistent error pattern detected: {pattern[:80]}',
-                                details={'pattern': pattern, 'dismissable': True, 'occurrences': data['count']}
+                                reason=f'Recurring error ({data["count"]}x): {sample[:150]}',
+                                details={'pattern': pattern, 'sample': sample,
+                                         'dismissable': True, 'occurrences': data['count']}
                             )
                 
                 patterns_to_remove = [
@@ -1940,26 +2182,33 @@ class HealthMonitor:
                     reason = f'Critical error detected: {representative_error[:100]}'
                 elif cascade_count > 0:
                     status = 'WARNING'
-                    reason = f'Error cascade detected: {cascade_count} pattern(s) repeating ≥15 times in 3min'
+                    samples = _get_samples(cascading_errors, 3)
+                    reason = f'Error cascade ({cascade_count} patterns repeating):\n' + '\n'.join(f'  - {s}' for s in samples)
                 elif spike_count > 0:
                     status = 'WARNING'
-                    reason = f'Error spike detected: {spike_count} pattern(s) increased 4x'
+                    samples = _get_samples(spike_errors, 3)
+                    reason = f'Error spike ({spike_count} patterns with 4x increase):\n' + '\n'.join(f'  - {s}' for s in samples)
                 elif persistent_count > 0:
                     status = 'WARNING'
-                    reason = f'Persistent errors: {persistent_count} pattern(s) recurring over 15+ minutes'
+                    samples = _get_samples(persistent_errors, 3)
+                    reason = f'Persistent errors ({persistent_count} patterns over 15+ min):\n' + '\n'.join(f'  - {s}' for s in samples)
                 else:
                     # No significant issues found
                     status = 'OK'
                     reason = None
                 
                 # Record/clear persistent errors for each log sub-check so Dismiss works
+                cascade_samples = _get_samples(cascading_errors, 2) if cascade_count else []
+                spike_samples = _get_samples(spike_errors, 2) if spike_count else []
+                persist_samples = _get_samples(persistent_errors, 2) if persistent_count else []
+                
                 log_sub_checks = {
                     'log_error_cascade': {'active': cascade_count > 0, 'severity': 'WARNING',
-                        'reason': f'{cascade_count} pattern(s) repeating >=15 times'},
+                        'reason': f'{cascade_count} pattern(s) repeating >=15 times:\n' + '\n'.join(f'  - {s}' for s in cascade_samples) if cascade_count else ''},
                     'log_error_spike': {'active': spike_count > 0, 'severity': 'WARNING',
-                        'reason': f'{spike_count} pattern(s) with 4x increase'},
+                        'reason': f'{spike_count} pattern(s) with 4x increase:\n' + '\n'.join(f'  - {s}' for s in spike_samples) if spike_count else ''},
                     'log_persistent_errors': {'active': persistent_count > 0, 'severity': 'WARNING',
-                        'reason': f'{persistent_count} recurring pattern(s) over 15+ min'},
+                        'reason': f'{persistent_count} recurring pattern(s) over 15+ min:\n' + '\n'.join(f'  - {s}' for s in persist_samples) if persistent_count else ''},
                     'log_critical_errors': {'active': unique_critical_count > 0, 'severity': 'CRITICAL',
                         'reason': f'{unique_critical_count} critical error(s) found', 'dismissable': False},
                 }
@@ -2335,20 +2584,7 @@ class HealthMonitor:
                 msg = f'{total_banned} IP(s) currently banned by Fail2Ban (jails: {jails_str})'
                 result['status'] = 'WARNING'
                 result['detail'] = msg
-                
-                # Record in persistence (dismissable)
-                health_persistence.record_error(
-                    error_key='fail2ban',
-                    category='security',
-                    severity='WARNING',
-                    reason=msg,
-                    details={
-                        'banned_count': total_banned,
-                        'jails': jails_with_bans,
-                        'banned_ips': all_banned_ips[:5],
-                        'dismissable': True
-                    }
-                )
+                # Persistence handled by _check_security caller via security_fail2ban key
             else:
                 result['detail'] = f'Fail2Ban active ({len(jails)} jail(s), no current bans)'
                 # Auto-resolve if previously banned IPs are now gone
@@ -2456,14 +2692,60 @@ class HealthMonitor:
             except Exception:
                 pass
             
-            # Determine overall security status
-            if issues:
-                # Check if any sub-check is CRITICAL
-                has_critical = any(c.get('status') == 'CRITICAL' for c in checks.values())
+            # Persist errors and respect dismiss for each sub-check
+            dismissed_keys = set()
+            security_sub_checks = {
+                'security_login_attempts': checks.get('login_attempts', {}),
+                'security_certificates': checks.get('certificates', {}),
+                'security_uptime': checks.get('uptime', {}),
+                'security_fail2ban': checks.get('fail2ban', {}),
+            }
+            
+            for err_key, check_info in security_sub_checks.items():
+                check_status = check_info.get('status', 'OK')
+                if check_status not in ('OK', 'INFO'):
+                    is_dismissable = check_info.get('dismissable', True)
+                    rec_result = health_persistence.record_error(
+                        error_key=err_key,
+                        category='security',
+                        severity=check_status,
+                        reason=check_info.get('detail', ''),
+                        details={'dismissable': is_dismissable}
+                    )
+                    if rec_result and rec_result.get('type') == 'skipped_acknowledged':
+                        dismissed_keys.add(err_key)
+                elif health_persistence.is_error_active(err_key):
+                    health_persistence.clear_error(err_key)
+            
+            # Rebuild issues excluding dismissed sub-checks
+            key_to_check = {
+                'security_login_attempts': 'login_attempts',
+                'security_certificates': 'certificates',
+                'security_uptime': 'uptime',
+                'security_fail2ban': 'fail2ban',
+            }
+            active_issues = []
+            for err_key, check_name in key_to_check.items():
+                if err_key in dismissed_keys:
+                    # Mark as dismissed in checks for the frontend
+                    if check_name in checks:
+                        checks[check_name]['dismissed'] = True
+                    continue
+                check_info = checks.get(check_name, {})
+                if check_info.get('status', 'OK') not in ('OK', 'INFO'):
+                    active_issues.append(check_info.get('detail', ''))
+            
+            # Determine overall security status from non-dismissed issues only
+            if active_issues:
+                has_critical = any(
+                    c.get('status') == 'CRITICAL'
+                    for k, c in checks.items()
+                    if f'security_{k}' not in dismissed_keys
+                )
                 overall_status = 'CRITICAL' if has_critical else 'WARNING'
                 return {
                     'status': overall_status,
-                    'reason': '; '.join(issues[:2]),
+                    'reason': '; '.join(active_issues[:2]),
                     'checks': checks
                 }
             
diff --git a/AppImage/scripts/health_persistence.py b/AppImage/scripts/health_persistence.py
index 377f71da..fede9b53 100644
--- a/AppImage/scripts/health_persistence.py
+++ b/AppImage/scripts/health_persistence.py
@@ -25,12 +25,8 @@ from pathlib import Path
 class HealthPersistence:
     """Manages persistent health error tracking"""
     
-    # Error retention periods (seconds)
-    VM_ERROR_RETENTION = 48 * 3600  # 48 hours
-    LOG_ERROR_RETENTION = 24 * 3600  # 24 hours
-    DISK_ERROR_RETENTION = 48 * 3600  # 48 hours
-    
-    # Default suppression: 24 hours (user can change per-category in settings)
+    # Default suppression duration when no user setting exists for a category.
+    # Users override per-category via the Suppression Duration settings UI.
     DEFAULT_SUPPRESSION_HOURS = 24
     
     # Mapping from error categories to settings keys
@@ -114,6 +110,31 @@ class HealthPersistence:
             )
         ''')
         
+        # Notification history table (records all sent notifications)
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS notification_history (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                event_type TEXT NOT NULL,
+                channel TEXT NOT NULL,
+                title TEXT,
+                message TEXT,
+                severity TEXT,
+                sent_at TEXT NOT NULL,
+                success INTEGER DEFAULT 1,
+                error_message TEXT,
+                source TEXT DEFAULT 'server'
+            )
+        ''')
+        
+        # Notification cooldown persistence (survives restarts)
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS notification_last_sent (
+                fingerprint TEXT PRIMARY KEY,
+                last_sent_ts INTEGER NOT NULL,
+                count INTEGER DEFAULT 1
+            )
+        ''')
+        
         # Migration: add suppression_hours column to errors if not present
         cursor.execute("PRAGMA table_info(errors)")
         columns = [col[1] for col in cursor.fetchall()]
@@ -125,6 +146,9 @@ class HealthPersistence:
         cursor.execute('CREATE INDEX IF NOT EXISTS idx_category ON errors(category)')
         cursor.execute('CREATE INDEX IF NOT EXISTS idx_resolved ON errors(resolved_at)')
         cursor.execute('CREATE INDEX IF NOT EXISTS idx_events_error ON events(error_key)')
+        cursor.execute('CREATE INDEX IF NOT EXISTS idx_notif_sent_at ON notification_history(sent_at)')
+        cursor.execute('CREATE INDEX IF NOT EXISTS idx_notif_severity ON notification_history(severity)')
+        cursor.execute('CREATE INDEX IF NOT EXISTS idx_nls_ts ON notification_last_sent(last_sent_ts)')
         
         conn.commit()
         conn.close()
@@ -468,32 +492,58 @@ class HealthPersistence:
         cursor = conn.cursor()
         
         now = datetime.now()
+        now_iso = now.isoformat()
         
         # Delete resolved errors older than 7 days
         cutoff_resolved = (now - timedelta(days=7)).isoformat()
         cursor.execute('DELETE FROM errors WHERE resolved_at < ?', (cutoff_resolved,))
         
-        # Auto-resolve VM/CT errors older than 48h
-        cutoff_vm = (now - timedelta(seconds=self.VM_ERROR_RETENTION)).isoformat()
-        cursor.execute('''
-            UPDATE errors 
-            SET resolved_at = ?
-            WHERE category = 'vms' 
-              AND resolved_at IS NULL 
-              AND first_seen < ?
-              AND acknowledged = 0
-        ''', (now.isoformat(), cutoff_vm))
+        # ── Auto-resolve stale errors using Suppression Duration settings ──
+        # Read per-category suppression hours from user_settings.
+        # If the user hasn't configured a value, use DEFAULT_SUPPRESSION_HOURS.
+        # This is the SINGLE source of truth for auto-resolution timing.
+        user_settings = {}
+        try:
+            cursor.execute(
+                'SELECT setting_key, setting_value FROM user_settings WHERE setting_key LIKE ?',
+                ('suppress_%',)
+            )
+            for row in cursor.fetchall():
+                user_settings[row[0]] = row[1]
+        except Exception:
+            pass
         
-        # Auto-resolve log errors older than 24h
-        cutoff_logs = (now - timedelta(seconds=self.LOG_ERROR_RETENTION)).isoformat()
+        for category, setting_key in self.CATEGORY_SETTING_MAP.items():
+            stored = user_settings.get(setting_key)
+            try:
+                hours = int(stored) if stored else self.DEFAULT_SUPPRESSION_HOURS
+            except (ValueError, TypeError):
+                hours = self.DEFAULT_SUPPRESSION_HOURS
+            
+            # -1 means permanently suppressed -- skip auto-resolve
+            if hours < 0:
+                continue
+            
+            cutoff = (now - timedelta(hours=hours)).isoformat()
+            cursor.execute('''
+                UPDATE errors 
+                SET resolved_at = ?
+                WHERE category = ?
+                  AND resolved_at IS NULL 
+                  AND last_seen < ?
+                  AND acknowledged = 0
+            ''', (now_iso, category, cutoff))
+        
+        # Catch-all: auto-resolve any error from an unmapped category
+        # whose last_seen exceeds DEFAULT_SUPPRESSION_HOURS.
+        fallback_cutoff = (now - timedelta(hours=self.DEFAULT_SUPPRESSION_HOURS)).isoformat()
         cursor.execute('''
-            UPDATE errors 
+            UPDATE errors
             SET resolved_at = ?
-            WHERE category = 'logs' 
-              AND resolved_at IS NULL 
-              AND first_seen < ?
+            WHERE resolved_at IS NULL
               AND acknowledged = 0
-        ''', (now.isoformat(), cutoff_logs))
+              AND last_seen < ?
+        ''', (now_iso, fallback_cutoff))
         
         # Delete old events (>30 days)
         cutoff_events = (now - timedelta(days=30)).isoformat()
diff --git a/AppImage/scripts/notification_channels.py b/AppImage/scripts/notification_channels.py
new file mode 100644
index 00000000..9cb6255f
--- /dev/null
+++ b/AppImage/scripts/notification_channels.py
@@ -0,0 +1,579 @@
+"""
+ProxMenux Notification Channels
+Provides transport adapters for Telegram, Gotify, and Discord.
+
+Each channel implements send() and test() with:
+- Retry with exponential backoff (3 attempts)
+- Request timeout of 10s
+- Rate limiting (max 30 msg/min per channel)
+
+Author: MacRimi
+"""
+
+import json
+import time
+import urllib.request
+import urllib.error
+import urllib.parse
+from abc import ABC, abstractmethod
+from collections import deque
+from typing import Tuple, Optional, Dict, Any
+
+
+# ─── Rate Limiter ────────────────────────────────────────────────
+
+class RateLimiter:
+    """Token-bucket rate limiter: max N messages per window."""
+    
+    def __init__(self, max_calls: int = 30, window_seconds: int = 60):
+        self.max_calls = max_calls
+        self.window = window_seconds
+        self._timestamps: deque = deque()
+    
+    def allow(self) -> bool:
+        now = time.monotonic()
+        while self._timestamps and now - self._timestamps[0] > self.window:
+            self._timestamps.popleft()
+        if len(self._timestamps) >= self.max_calls:
+            return False
+        self._timestamps.append(now)
+        return True
+    
+    def wait_time(self) -> float:
+        if not self._timestamps:
+            return 0.0
+        return max(0.0, self.window - (time.monotonic() - self._timestamps[0]))
+
+
+# ─── Base Channel ────────────────────────────────────────────────
+
+class NotificationChannel(ABC):
+    """Abstract base for all notification channels."""
+    
+    MAX_RETRIES = 3
+    RETRY_DELAYS = [2, 4, 8]  # exponential backoff seconds
+    REQUEST_TIMEOUT = 10
+    
+    def __init__(self):
+        self._rate_limiter = RateLimiter(max_calls=30, window_seconds=60)
+    
+    @abstractmethod
+    def send(self, title: str, message: str, severity: str = 'INFO',
+             data: Optional[Dict] = None) -> Dict[str, Any]:
+        """Send a notification. Returns {success, error, channel}."""
+        pass
+    
+    @abstractmethod
+    def test(self) -> Tuple[bool, str]:
+        """Send a test message. Returns (success, error_message)."""
+        pass
+    
+    @abstractmethod
+    def validate_config(self) -> Tuple[bool, str]:
+        """Check if config is valid without sending. Returns (valid, error)."""
+        pass
+    
+    def _http_request(self, url: str, data: bytes, headers: Dict[str, str],
+                      method: str = 'POST') -> Tuple[int, str]:
+        """Execute HTTP request with timeout. Returns (status_code, body)."""
+        req = urllib.request.Request(url, data=data, headers=headers, method=method)
+        try:
+            with urllib.request.urlopen(req, timeout=self.REQUEST_TIMEOUT) as resp:
+                body = resp.read().decode('utf-8', errors='replace')
+                return resp.status, body
+        except urllib.error.HTTPError as e:
+            body = e.read().decode('utf-8', errors='replace') if e.fp else str(e)
+            return e.code, body
+        except urllib.error.URLError as e:
+            return 0, str(e.reason)
+        except Exception as e:
+            return 0, str(e)
+    
+    def _send_with_retry(self, send_fn) -> Dict[str, Any]:
+        """Wrap a send function with rate limiting and retry logic."""
+        if not self._rate_limiter.allow():
+            wait = self._rate_limiter.wait_time()
+            return {
+                'success': False,
+                'error': f'Rate limited. Retry in {wait:.0f}s',
+                'rate_limited': True
+            }
+        
+        last_error = ''
+        for attempt in range(self.MAX_RETRIES):
+            try:
+                status, body = send_fn()
+                if 200 <= status < 300:
+                    return {'success': True, 'error': None}
+                last_error = f'HTTP {status}: {body[:200]}'
+            except Exception as e:
+                last_error = str(e)
+            
+            if attempt < self.MAX_RETRIES - 1:
+                time.sleep(self.RETRY_DELAYS[attempt])
+        
+        return {'success': False, 'error': last_error}
+
+
+# ─── Telegram ────────────────────────────────────────────────────
+
+class TelegramChannel(NotificationChannel):
+    """Telegram Bot API channel using HTML parse mode."""
+    
+    API_BASE = 'https://api.telegram.org/bot{token}/sendMessage'
+    MAX_LENGTH = 4096
+    
+    SEVERITY_ICONS = {
+        'CRITICAL': '\U0001F534',  # red circle
+        'WARNING':  '\U0001F7E1',  # yellow circle
+        'INFO':     '\U0001F535',  # blue circle
+        'OK':       '\U0001F7E2',  # green circle
+        'UNKNOWN':  '\u26AA',      # white circle
+    }
+    
+    def __init__(self, bot_token: str, chat_id: str):
+        super().__init__()
+        token = bot_token.strip()
+        # Strip 'bot' prefix if user included it (API_BASE already adds it)
+        if token.lower().startswith('bot') and ':' in token[3:]:
+            token = token[3:]
+        self.bot_token = token
+        self.chat_id = chat_id.strip()
+    
+    def validate_config(self) -> Tuple[bool, str]:
+        if not self.bot_token:
+            return False, 'Bot token is required'
+        if not self.chat_id:
+            return False, 'Chat ID is required'
+        if ':' not in self.bot_token:
+            return False, 'Invalid bot token format (expected BOT_ID:TOKEN)'
+        return True, ''
+    
+    def send(self, title: str, message: str, severity: str = 'INFO',
+             data: Optional[Dict] = None) -> Dict[str, Any]:
+        icon = self.SEVERITY_ICONS.get(severity, self.SEVERITY_ICONS['INFO'])
+        html_msg = f"<b>{icon} {self._escape_html(title)}</b>\n\n{self._escape_html(message)}"
+        
+        # Split long messages
+        chunks = self._split_message(html_msg)
+        result = {'success': True, 'error': None, 'channel': 'telegram'}
+        
+        for chunk in chunks:
+            res = self._send_with_retry(lambda c=chunk: self._post_message(c))
+            if not res['success']:
+                result = {**res, 'channel': 'telegram'}
+                break
+        
+        return result
+    
+    def test(self) -> Tuple[bool, str]:
+        valid, err = self.validate_config()
+        if not valid:
+            return False, err
+        
+        result = self.send(
+            'ProxMenux Test',
+            'Notification service is working correctly.\nThis is a test message from ProxMenux Monitor.',
+            'INFO'
+        )
+        return result['success'], result.get('error', '')
+    
+    def _post_message(self, text: str) -> Tuple[int, str]:
+        url = self.API_BASE.format(token=self.bot_token)
+        payload = json.dumps({
+            'chat_id': self.chat_id,
+            'text': text,
+            'parse_mode': 'HTML',
+            'disable_web_page_preview': True,
+        }).encode('utf-8')
+        
+        return self._http_request(url, payload, {'Content-Type': 'application/json'})
+    
+    def _split_message(self, text: str) -> list:
+        if len(text) <= self.MAX_LENGTH:
+            return [text]
+        chunks = []
+        while text:
+            if len(text) <= self.MAX_LENGTH:
+                chunks.append(text)
+                break
+            split_at = text.rfind('\n', 0, self.MAX_LENGTH)
+            if split_at == -1:
+                split_at = self.MAX_LENGTH
+            chunks.append(text[:split_at])
+            text = text[split_at:].lstrip('\n')
+        return chunks
+    
+    @staticmethod
+    def _escape_html(text: str) -> str:
+        return (text
+                .replace('&', '&amp;')
+                .replace('<', '&lt;')
+                .replace('>', '&gt;'))
+
+
+# ─── Gotify ──────────────────────────────────────────────────────
+
+class GotifyChannel(NotificationChannel):
+    """Gotify push notification channel with priority mapping."""
+    
+    PRIORITY_MAP = {
+        'OK':       1,
+        'INFO':     2,
+        'UNKNOWN':  3,
+        'WARNING':  5,
+        'CRITICAL': 10,
+    }
+    
+    def __init__(self, server_url: str, app_token: str):
+        super().__init__()
+        self.server_url = server_url.rstrip('/').strip()
+        self.app_token = app_token.strip()
+    
+    def validate_config(self) -> Tuple[bool, str]:
+        if not self.server_url:
+            return False, 'Server URL is required'
+        if not self.app_token:
+            return False, 'Application token is required'
+        if not self.server_url.startswith(('http://', 'https://')):
+            return False, 'Server URL must start with http:// or https://'
+        return True, ''
+    
+    def send(self, title: str, message: str, severity: str = 'INFO',
+             data: Optional[Dict] = None) -> Dict[str, Any]:
+        priority = self.PRIORITY_MAP.get(severity, 2)
+        
+        result = self._send_with_retry(
+            lambda: self._post_message(title, message, priority)
+        )
+        result['channel'] = 'gotify'
+        return result
+    
+    def test(self) -> Tuple[bool, str]:
+        valid, err = self.validate_config()
+        if not valid:
+            return False, err
+        
+        result = self.send(
+            'ProxMenux Test',
+            'Notification service is working correctly.\nThis is a test message from ProxMenux Monitor.',
+            'INFO'
+        )
+        return result['success'], result.get('error', '')
+    
+    def _post_message(self, title: str, message: str, priority: int) -> Tuple[int, str]:
+        url = f"{self.server_url}/message?token={self.app_token}"
+        payload = json.dumps({
+            'title': title,
+            'message': message,
+            'priority': priority,
+            'extras': {
+                'client::display': {'contentType': 'text/markdown'}
+            }
+        }).encode('utf-8')
+        
+        return self._http_request(url, payload, {'Content-Type': 'application/json'})
+
+
+# ─── Discord ─────────────────────────────────────────────────────
+
+class DiscordChannel(NotificationChannel):
+    """Discord webhook channel with color-coded embeds."""
+    
+    MAX_EMBED_DESC = 2048
+    
+    SEVERITY_COLORS = {
+        'CRITICAL': 0xED4245,   # red
+        'WARNING':  0xFEE75C,   # yellow
+        'INFO':     0x5865F2,   # blurple
+        'OK':       0x57F287,   # green
+        'UNKNOWN':  0x99AAB5,   # grey
+    }
+    
+    def __init__(self, webhook_url: str):
+        super().__init__()
+        self.webhook_url = webhook_url.strip()
+    
+    def validate_config(self) -> Tuple[bool, str]:
+        if not self.webhook_url:
+            return False, 'Webhook URL is required'
+        if 'discord.com/api/webhooks/' not in self.webhook_url:
+            return False, 'Invalid Discord webhook URL'
+        return True, ''
+    
+    def send(self, title: str, message: str, severity: str = 'INFO',
+             data: Optional[Dict] = None) -> Dict[str, Any]:
+        color = self.SEVERITY_COLORS.get(severity, 0x5865F2)
+        
+        desc = message[:self.MAX_EMBED_DESC] if len(message) > self.MAX_EMBED_DESC else message
+        
+        embed = {
+            'title': title,
+            'description': desc,
+            'color': color,
+            'footer': {'text': 'ProxMenux Monitor'},
+            'timestamp': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()),
+        }
+        
+        # Use structured fields from render_template if available
+        rendered_fields = (data or {}).get('_rendered_fields', [])
+        if rendered_fields:
+            embed['fields'] = [
+                {'name': name, 'value': val[:1024], 'inline': True}
+                for name, val in rendered_fields[:25]  # Discord limit: 25 fields
+            ]
+        elif data:
+            fields = []
+            if data.get('category'):
+                fields.append({'name': 'Category', 'value': data['category'], 'inline': True})
+            if data.get('hostname'):
+                fields.append({'name': 'Host', 'value': data['hostname'], 'inline': True})
+            if data.get('severity'):
+                fields.append({'name': 'Severity', 'value': data['severity'], 'inline': True})
+            if fields:
+                embed['fields'] = fields
+        
+        result = self._send_with_retry(
+            lambda: self._post_webhook(embed)
+        )
+        result['channel'] = 'discord'
+        return result
+    
+    def test(self) -> Tuple[bool, str]:
+        valid, err = self.validate_config()
+        if not valid:
+            return False, err
+        
+        result = self.send(
+            'ProxMenux Test',
+            'Notification service is working correctly.\nThis is a test message from ProxMenux Monitor.',
+            'INFO'
+        )
+        return result['success'], result.get('error', '')
+    
+    def _post_webhook(self, embed: Dict) -> Tuple[int, str]:
+        payload = json.dumps({
+            'username': 'ProxMenux',
+            'embeds': [embed]
+        }).encode('utf-8')
+        
+        return self._http_request(
+            self.webhook_url, payload, {'Content-Type': 'application/json'}
+        )
+
+
+# ─── Email Channel ──────────────────────────────────────────────
+
+class EmailChannel(NotificationChannel):
+    """Email notification channel using SMTP (smtplib) or sendmail fallback.
+    
+    Config keys:
+      host, port, username, password, tls_mode (none|starttls|ssl),
+      from_address, to_addresses (comma-separated), subject_prefix, timeout
+    """
+    
+    def __init__(self, config: Dict[str, str]):
+        super().__init__()
+        self.host = config.get('host', '')
+        self.port = int(config.get('port', 587) or 587)
+        self.username = config.get('username', '')
+        self.password = config.get('password', '')
+        self.tls_mode = config.get('tls_mode', 'starttls')  # none | starttls | ssl
+        self.from_address = config.get('from_address', '')
+        self.to_addresses = self._parse_recipients(config.get('to_addresses', ''))
+        self.subject_prefix = config.get('subject_prefix', '[ProxMenux]')
+        self.timeout = int(config.get('timeout', 10) or 10)
+    
+    @staticmethod
+    def _parse_recipients(raw) -> list:
+        if isinstance(raw, list):
+            return [a.strip() for a in raw if a.strip()]
+        return [addr.strip() for addr in str(raw).split(',') if addr.strip()]
+    
+    def validate_config(self) -> Tuple[bool, str]:
+        if not self.to_addresses:
+            return False, 'No recipients configured'
+        if not self.from_address:
+            return False, 'No from address configured'
+        # Must have SMTP host OR local sendmail available
+        if not self.host:
+            import os
+            if not os.path.exists('/usr/sbin/sendmail'):
+                return False, 'No SMTP host configured and /usr/sbin/sendmail not found'
+        return True, ''
+    
+    def send(self, title: str, message: str, severity: str = 'INFO',
+             data: Optional[Dict] = None) -> Dict[str, Any]:
+        subject = f"{self.subject_prefix} [{severity}] {title}"
+        
+        def _do_send():
+            if self.host:
+                return self._send_smtp(subject, message, severity)
+            else:
+                return self._send_sendmail(subject, message, severity)
+        
+        return self._send_with_retry(_do_send)
+    
+    def _send_smtp(self, subject: str, body: str, severity: str) -> Tuple[int, str]:
+        import smtplib
+        from email.message import EmailMessage
+        
+        msg = EmailMessage()
+        msg['Subject'] = subject
+        msg['From'] = self.from_address
+        msg['To'] = ', '.join(self.to_addresses)
+        msg.set_content(body)
+        
+        # Add HTML alternative
+        html_body = self._format_html(subject, body, severity)
+        if html_body:
+            msg.add_alternative(html_body, subtype='html')
+        
+        try:
+            if self.tls_mode == 'ssl':
+                server = smtplib.SMTP_SSL(self.host, self.port, timeout=self.timeout)
+            else:
+                server = smtplib.SMTP(self.host, self.port, timeout=self.timeout)
+                if self.tls_mode == 'starttls':
+                    server.starttls()
+            
+            if self.username and self.password:
+                server.login(self.username, self.password)
+            
+            server.send_message(msg)
+            server.quit()
+            return 200, 'OK'
+        except smtplib.SMTPAuthenticationError as e:
+            return 0, f'SMTP authentication failed: {e}'
+        except smtplib.SMTPConnectError as e:
+            return 0, f'SMTP connection failed: {e}'
+        except smtplib.SMTPException as e:
+            return 0, f'SMTP error: {e}'
+        except (OSError, TimeoutError) as e:
+            return 0, f'Connection error: {e}'
+    
+    def _send_sendmail(self, subject: str, body: str, severity: str) -> Tuple[int, str]:
+        import os
+        import subprocess
+        from email.message import EmailMessage
+        
+        sendmail = '/usr/sbin/sendmail'
+        if not os.path.exists(sendmail):
+            return 0, 'sendmail not found at /usr/sbin/sendmail'
+        
+        msg = EmailMessage()
+        msg['Subject'] = subject
+        msg['From'] = self.from_address or 'proxmenux@localhost'
+        msg['To'] = ', '.join(self.to_addresses)
+        msg.set_content(body)
+        
+        try:
+            proc = subprocess.run(
+                [sendmail, '-t', '-oi'],
+                input=msg.as_string(), capture_output=True, text=True, timeout=30
+            )
+            if proc.returncode == 0:
+                return 200, 'OK'
+            return 0, f'sendmail failed (rc={proc.returncode}): {proc.stderr[:200]}'
+        except subprocess.TimeoutExpired:
+            return 0, 'sendmail timed out after 30s'
+        except Exception as e:
+            return 0, f'sendmail error: {e}'
+    
+    @staticmethod
+    def _format_html(subject: str, body: str, severity: str) -> str:
+        """Create professional HTML email."""
+        import html as html_mod
+        
+        severity_colors = {'CRITICAL': '#dc2626', 'WARNING': '#f59e0b', 'INFO': '#3b82f6'}
+        color = severity_colors.get(severity, '#6b7280')
+        
+        body_html = ''.join(
+            f'<p style="margin:4px 0;color:#374151;">{html_mod.escape(line)}</p>'
+            for line in body.split('\n') if line.strip()
+        )
+        
+        return f'''<!DOCTYPE html>
+<html><body style="font-family:-apple-system,Arial,sans-serif;background:#f3f4f6;padding:20px;">
+<div style="max-width:600px;margin:0 auto;background:#fff;border-radius:8px;overflow:hidden;">
+  <div style="background:{color};padding:16px 24px;">
+    <h2 style="color:#fff;margin:0;font-size:16px;">ProxMenux Monitor</h2>
+    <p style="color:rgba(255,255,255,0.85);margin:4px 0 0;font-size:13px;">{html_mod.escape(severity)} Alert</p>
+  </div>
+  <div style="padding:24px;">
+    <h3 style="margin:0 0 12px;color:#111827;">{html_mod.escape(subject)}</h3>
+    {body_html}
+  </div>
+  <div style="background:#f9fafb;padding:12px 24px;border-top:1px solid #e5e7eb;">
+    <p style="margin:0;font-size:11px;color:#9ca3af;">Sent by ProxMenux Notification Service</p>
+  </div>
+</div>
+</body></html>'''
+    
+    def test(self) -> Tuple[bool, str]:
+        result = self.send(
+            'ProxMenux Test Notification',
+            'This is a test notification from ProxMenux Monitor.\n'
+            'If you received this, your email channel is working correctly.',
+            'INFO'
+        )
+        return result.get('success', False), result.get('error', '')
+
+
+# ─── Channel Factory ─────────────────────────────────────────────
+
+CHANNEL_TYPES = {
+    'telegram': {
+        'name': 'Telegram',
+        'config_keys': ['bot_token', 'chat_id'],
+        'class': TelegramChannel,
+    },
+    'gotify': {
+        'name': 'Gotify',
+        'config_keys': ['url', 'token'],
+        'class': GotifyChannel,
+    },
+    'discord': {
+        'name': 'Discord',
+        'config_keys': ['webhook_url'],
+        'class': DiscordChannel,
+    },
+    'email': {
+        'name': 'Email (SMTP)',
+        'config_keys': ['host', 'port', 'username', 'password', 'tls_mode',
+                        'from_address', 'to_addresses', 'subject_prefix'],
+        'class': EmailChannel,
+    },
+}
+
+
+def create_channel(channel_type: str, config: Dict[str, str]) -> Optional[NotificationChannel]:
+    """Create a channel instance from type name and config dict.
+    
+    Args:
+        channel_type: 'telegram', 'gotify', or 'discord'
+        config: Dict with channel-specific keys (see CHANNEL_TYPES)
+    
+    Returns:
+        Channel instance or None if creation fails
+    """
+    try:
+        if channel_type == 'telegram':
+            return TelegramChannel(
+                bot_token=config.get('bot_token', ''),
+                chat_id=config.get('chat_id', '')
+            )
+        elif channel_type == 'gotify':
+            return GotifyChannel(
+                server_url=config.get('url', ''),
+                app_token=config.get('token', '')
+            )
+        elif channel_type == 'discord':
+            return DiscordChannel(
+                webhook_url=config.get('webhook_url', '')
+            )
+        elif channel_type == 'email':
+            return EmailChannel(config)
+    except Exception as e:
+        print(f"[NotificationChannels] Failed to create {channel_type}: {e}")
+    return None
diff --git a/AppImage/scripts/notification_events.py b/AppImage/scripts/notification_events.py
new file mode 100644
index 00000000..8a47d428
--- /dev/null
+++ b/AppImage/scripts/notification_events.py
@@ -0,0 +1,1301 @@
+"""
+ProxMenux Notification Event Watchers
+Detects Proxmox events from journald, PVE task log, and health monitor.
+
+Architecture:
+- JournalWatcher: Real-time stream of journald for critical events
+- TaskWatcher: Real-time tail of /var/log/pve/tasks/index for VM/CT/backup events
+- PollingCollector: Periodic poll of health_persistence pending notifications
+
+All watchers put events into a shared Queue consumed by NotificationManager.
+
+Author: MacRimi
+"""
+
+import os
+import re
+import json
+import time
+import hashlib
+import socket
+import sqlite3
+import subprocess
+import threading
+from queue import Queue
+from typing import Optional, Dict, Any, Tuple
+from pathlib import Path
+
+
+# ─── Event Object ─────────────────────────────────────────────────
+
+class NotificationEvent:
+    """Represents a detected event ready for notification dispatch.
+    
+    Fields:
+        event_type:   Taxonomy key (e.g. 'vm_fail', 'auth_fail', 'split_brain')
+        severity:     INFO | WARNING | CRITICAL
+        data:         Payload dict with context (hostname, vmid, reason, etc.)
+        source:       Origin: journal | tasks | health | proxmox_hook | cli | api | polling
+        entity:       What is affected: node | vm | ct | storage | disk | network | cluster | user
+        entity_id:    Specific identifier (vmid, IP, device, pool, interface, etc.)
+        raw:          Original payload (webhook JSON or log line), optional
+        fingerprint:  Stable dedup key: hostname:entity:entity_id:event_type
+        event_id:     Short hash of fingerprint for correlation
+        ts_epoch:     time.time() at creation
+        ts_monotonic: time.monotonic() at creation (drift-safe for cooldown)
+    """
+    
+    __slots__ = (
+        'event_type', 'severity', 'data', 'timestamp', 'source',
+        'entity', 'entity_id', 'raw',
+        'fingerprint', 'event_id', 'ts_epoch', 'ts_monotonic',
+    )
+    
+    def __init__(self, event_type: str, severity: str = 'INFO',
+                 data: Optional[Dict[str, Any]] = None,
+                 source: str = 'watcher',
+                 entity: str = 'node', entity_id: str = '',
+                 raw: Any = None):
+        self.event_type = event_type
+        self.severity = severity
+        self.data = data or {}
+        self.source = source
+        self.entity = entity
+        self.entity_id = entity_id
+        self.raw = raw
+        self.ts_epoch = time.time()
+        self.ts_monotonic = time.monotonic()
+        self.timestamp = self.ts_epoch  # backward compat
+        
+        # Build fingerprint for dedup/cooldown
+        hostname = self.data.get('hostname', _hostname())
+        if entity_id:
+            fp_base = f"{hostname}:{entity}:{entity_id}:{event_type}"
+        else:
+            # When entity_id is empty, include a hash of title/body for uniqueness
+            reason = self.data.get('reason', self.data.get('title', ''))
+            stable_extra = hashlib.md5(reason.encode(errors='replace')).hexdigest()[:8] if reason else ''
+            fp_base = f"{hostname}:{entity}:{event_type}:{stable_extra}"
+        self.fingerprint = fp_base
+        self.event_id = hashlib.md5(fp_base.encode()).hexdigest()[:12]
+    
+    def __repr__(self):
+        return f"NotificationEvent({self.event_type}, {self.severity}, fp={self.fingerprint[:40]})"
+
+
+def _hostname() -> str:
+    try:
+        return socket.gethostname().split('.')[0]
+    except Exception:
+        return 'proxmox'
+
+
+# ─── Journal Watcher (Real-time) ─────────────────────────────────
+
+class JournalWatcher:
+    """Watches journald in real-time for critical system events.
+    
+    Uses 'journalctl -f -o json' subprocess to stream entries.
+    Detects: auth failures, kernel panics, OOM, service crashes,
+    disk I/O errors, split-brain, node disconnect, system shutdown,
+    fail2ban bans, firewall blocks, permission changes.
+    """
+    
+    def __init__(self, event_queue: Queue):
+        self._queue = event_queue
+        self._running = False
+        self._thread: Optional[threading.Thread] = None
+        self._process: Optional[subprocess.Popen] = None
+        self._hostname = _hostname()
+        
+        # Dedup: track recent events to avoid duplicates
+        self._recent_events: Dict[str, float] = {}
+        self._dedup_window = 30  # seconds
+    
+    def start(self):
+        """Start the journal watcher thread."""
+        if self._running:
+            return
+        self._running = True
+        self._thread = threading.Thread(target=self._watch_loop, daemon=True,
+                                        name='journal-watcher')
+        self._thread.start()
+    
+    def stop(self):
+        """Stop the journal watcher."""
+        self._running = False
+        if self._process:
+            try:
+                self._process.terminate()
+                self._process.wait(timeout=5)
+            except Exception:
+                try:
+                    self._process.kill()
+                except Exception:
+                    pass
+    
+    def _watch_loop(self):
+        """Main watch loop with auto-restart on failure."""
+        while self._running:
+            try:
+                self._run_journalctl()
+            except Exception as e:
+                print(f"[JournalWatcher] Error: {e}")
+            if self._running:
+                time.sleep(5)  # Wait before restart
+    
+    def _run_journalctl(self):
+        """Run journalctl -f and process output line by line."""
+        cmd = ['journalctl', '-f', '-o', 'json', '--no-pager',
+               '-n', '0']  # Start from now, don't replay history
+        
+        self._process = subprocess.Popen(
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL,
+            text=True, bufsize=1
+        )
+        
+        for line in self._process.stdout:
+            if not self._running:
+                break
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                entry = json.loads(line)
+                self._process_entry(entry)
+            except (json.JSONDecodeError, KeyError):
+                # Try plain text matching as fallback
+                self._process_plain(line)
+        
+        if self._process:
+            self._process.wait()
+    
+    def _process_entry(self, entry: Dict):
+        """Process a parsed journald JSON entry."""
+        msg = entry.get('MESSAGE', '')
+        if not msg or not isinstance(msg, str):
+            return
+        
+        unit = entry.get('_SYSTEMD_UNIT', '')
+        syslog_id = entry.get('SYSLOG_IDENTIFIER', '')
+        priority = int(entry.get('PRIORITY', 6))
+        
+        self._check_auth_failure(msg, syslog_id, entry)
+        self._check_fail2ban(msg, syslog_id)
+        self._check_kernel_critical(msg, syslog_id, priority)
+        self._check_service_failure(msg, unit)
+        self._check_disk_io(msg, syslog_id, priority)
+        self._check_cluster_events(msg, syslog_id)
+        self._check_system_shutdown(msg, syslog_id)
+        self._check_permission_change(msg, syslog_id)
+        self._check_firewall(msg, syslog_id)
+    
+    def _process_plain(self, line: str):
+        """Fallback: process a plain text log line."""
+        self._check_auth_failure(line, '', {})
+        self._check_fail2ban(line, '')
+        self._check_kernel_critical(line, '', 6)
+        self._check_cluster_events(line, '')
+        self._check_system_shutdown(line, '')
+    
+    # ── Detection methods ──
+    
+    def _check_auth_failure(self, msg: str, syslog_id: str, entry: Dict):
+        """Detect authentication failures (SSH, PAM, PVE)."""
+        patterns = [
+            (r'Failed password for (?:invalid user )?(\S+) from (\S+)', 'ssh'),
+            (r'authentication failure.*rhost=(\S+).*user=(\S+)', 'pam'),
+            (r'pvedaemon\[.*authentication failure.*rhost=(\S+)', 'pve'),
+        ]
+        
+        for pattern, service in patterns:
+            match = re.search(pattern, msg, re.IGNORECASE)
+            if match:
+                groups = match.groups()
+                if service == 'ssh':
+                    username, source_ip = groups[0], groups[1]
+                elif service == 'pam':
+                    source_ip, username = groups[0], groups[1]
+                else:
+                    source_ip = groups[0]
+                    username = 'unknown'
+                
+                self._emit('auth_fail', 'WARNING', {
+                    'source_ip': source_ip,
+                    'username': username,
+                    'service': service,
+                    'hostname': self._hostname,
+                }, entity='user', entity_id=source_ip)
+                return
+    
+    def _check_fail2ban(self, msg: str, syslog_id: str):
+        """Detect Fail2Ban IP bans."""
+        if 'fail2ban' not in msg.lower() and syslog_id != 'fail2ban-server':
+            return
+        
+        # Ban detected
+        ban_match = re.search(r'Ban\s+(\S+)', msg)
+        if ban_match:
+            ip = ban_match.group(1)
+            jail_match = re.search(r'\[(\w+)\]', msg)
+            jail = jail_match.group(1) if jail_match else 'unknown'
+            
+            self._emit('ip_block', 'INFO', {
+                'source_ip': ip,
+                'jail': jail,
+                'failures': '',
+                'hostname': self._hostname,
+            }, entity='user', entity_id=ip)
+    
+    def _check_kernel_critical(self, msg: str, syslog_id: str, priority: int):
+        """Detect kernel panics, OOM, segfaults, hardware errors."""
+        # Only process messages from kernel or systemd (not app-level logs)
+        if syslog_id and syslog_id not in ('kernel', 'systemd', 'systemd-coredump', ''):
+            return
+        
+        # Filter out normal kernel messages that are NOT problems
+        _KERNEL_NOISE = [
+            r'vfio-pci\s+\S+:\s*reset',       # PCI passthrough resets (normal during VM start/stop)
+            r'vfio-pci\s+\S+:\s*resetting',
+            r'entered\s+(?:promiscuous|allmulticast)\s+mode',  # Network bridge ops
+            r'entered\s+(?:blocking|forwarding|disabled)\s+state',  # Bridge STP
+            r'tap\d+i\d+:',                     # TAP interface events
+            r'vmbr\d+:.*port\s+\d+',            # Bridge port events
+        ]
+        for noise in _KERNEL_NOISE:
+            if re.search(noise, msg, re.IGNORECASE):
+                return
+        
+        # NOTE: Disk I/O errors (ATA, SCSI, blk_update_request) are NOT handled
+        # here. They are detected exclusively by HealthMonitor._check_disks_optimized
+        # which records to health_persistence -> PollingCollector -> notification.
+        # This avoids duplicate notifications and ensures the health dashboard
+        # stays in sync with notifications.
+        # Filesystem errors (EXT4/BTRFS/XFS/ZFS) ARE handled here because they
+        # indicate corruption, not just hardware I/O problems.
+        
+        critical_patterns = {
+            r'kernel panic':       ('system_problem', 'CRITICAL', 'Kernel panic'),
+            r'Out of memory':      ('system_problem', 'CRITICAL', 'Out of memory killer activated'),
+            r'segfault':           ('system_problem', 'WARNING',  'Segmentation fault detected'),
+            r'BUG:':               ('system_problem', 'CRITICAL', 'Kernel BUG detected'),
+            r'Call Trace:':        ('system_problem', 'WARNING',  'Kernel call trace'),
+            r'EXT4-fs error':      ('system_problem', 'CRITICAL', 'Filesystem error'),
+            r'BTRFS error':        ('system_problem', 'CRITICAL', 'Filesystem error'),
+            r'XFS.*error':         ('system_problem', 'CRITICAL', 'Filesystem error'),
+            r'ZFS.*error':         ('system_problem', 'CRITICAL', 'ZFS pool error'),
+            r'mce:.*Hardware Error': ('system_problem', 'CRITICAL', 'Hardware error (MCE)'),
+        }
+        
+        for pattern, (event_type, severity, reason) in critical_patterns.items():
+            if re.search(pattern, msg, re.IGNORECASE):
+                entity = 'node'
+                entity_id = ''
+                
+                # Build a context-rich reason from the journal message.
+                enriched = reason
+                
+                if 'segfault' in pattern:
+                    m = re.search(r'(\S+)\[(\d+)\].*segfault', msg)
+                    proc_name = m.group(1) if m else ''
+                    proc_pid = m.group(2) if m else ''
+                    lib_match = re.search(r'\bin\s+(\S+)', msg)
+                    lib_name = lib_match.group(1) if lib_match else ''
+                    
+                    parts = [reason]
+                    if proc_name:
+                        parts.append(f"Process: {proc_name}" + (f" (PID {proc_pid})" if proc_pid else ''))
+                    if lib_name:
+                        parts.append(f"Module: {lib_name}")
+                    enriched = '\n'.join(parts)
+                
+                elif 'Out of memory' in pattern:
+                    m = re.search(r'Killed process\s+(\d+)\s+\(([^)]+)\)', msg)
+                    if m:
+                        enriched = f"{reason}\nKilled: {m.group(2)} (PID {m.group(1)})"
+                    else:
+                        enriched = f"{reason}\n{msg[:300]}"
+                
+                else:
+                    # Generic: include the raw journal message for context
+                    enriched = f"{reason}\n{msg[:300]}"
+                
+                data = {'reason': enriched, 'hostname': self._hostname}
+                
+                self._emit(event_type, severity, data, entity=entity, entity_id=entity_id)
+                return
+    
+    def _check_service_failure(self, msg: str, unit: str):
+        """Detect critical service failures with enriched context."""
+        # Filter out noise -- these are normal systemd transient units,
+        # not real service failures worth alerting about.
+        _NOISE_PATTERNS = [
+            r'session-\d+\.scope',          # SSH/login sessions
+            r'user@\d+\.service',           # Per-user service managers
+            r'user-runtime-dir@\d+',        # User runtime dirs
+            r'systemd-coredump@',           # Coredump handlers (transient)
+            r'run-.*\.mount',               # Transient mounts
+        ]
+        for noise in _NOISE_PATTERNS:
+            if re.search(noise, msg) or re.search(noise, unit):
+                return
+        
+        service_patterns = [
+            r'Failed to start (.+)',
+            r'Unit (\S+) (?:entered failed state|failed)',
+            r'(\S+)\.service: (?:Main process exited|Failed with result)',
+        ]
+        
+        for pattern in service_patterns:
+            match = re.search(pattern, msg)
+            if match:
+                service_name = match.group(1)
+                data = {
+                    'service_name': service_name,
+                    'reason': msg[:300],
+                    'hostname': self._hostname,
+                }
+                
+                # Enrich PVE VM/CT services with guest name and context
+                # pve-container@101 -> LXC container 101
+                # qemu-server@100  -> QEMU VM 100
+                pve_match = re.match(
+                    r'(pve-container|qemu-server)@(\d+)', service_name)
+                if pve_match:
+                    svc_type = pve_match.group(1)
+                    vmid = pve_match.group(2)
+                    vm_name = self._resolve_vm_name(vmid)
+                    
+                    if svc_type == 'pve-container':
+                        guest_type = 'LXC container'
+                    else:
+                        guest_type = 'QEMU VM'
+                    
+                    display = f"{guest_type} {vmid}"
+                    if vm_name:
+                        display = f"{guest_type} {vmid} ({vm_name})"
+                    
+                    data['service_name'] = service_name
+                    data['vmid'] = vmid
+                    data['vmname'] = vm_name
+                    data['guest_type'] = guest_type
+                    data['display_name'] = display
+                    data['reason'] = (
+                        f"{display} failed to start.\n{msg[:300]}"
+                    )
+                
+                self._emit('service_fail', 'WARNING', data,
+                           entity='node', entity_id=service_name)
+                return
+    
+    def _resolve_vm_name(self, vmid: str) -> str:
+        """Try to resolve VMID to a guest name from PVE config files."""
+        if not vmid:
+            return ''
+        # Check QEMU configs
+        for base in ['/etc/pve/qemu-server', '/etc/pve/lxc']:
+            conf = os.path.join(base, f'{vmid}.conf')
+            try:
+                with open(conf) as f:
+                    for line in f:
+                        if line.startswith('hostname:') or line.startswith('name:'):
+                            return line.split(':', 1)[1].strip()
+            except (OSError, IOError):
+                continue
+        return ''
+    
+    def _check_disk_io(self, msg: str, syslog_id: str, priority: int):
+        """Detect disk I/O errors from kernel messages."""
+        if syslog_id != 'kernel' and priority > 3:
+            return
+        
+        io_patterns = [
+            r'blk_update_request: I/O error.*dev (\S+)',
+            r'Buffer I/O error on device (\S+)',
+            r'SCSI error.*sd(\w)',
+            r'ata\d+.*error',
+        ]
+        
+        for pattern in io_patterns:
+            match = re.search(pattern, msg)
+            if match:
+                device = match.group(1) if match.lastindex else 'unknown'
+                self._emit('disk_io_error', 'CRITICAL', {
+                    'device': device,
+                    'reason': msg[:200],
+                    'hostname': self._hostname,
+                }, entity='disk', entity_id=device)
+                return
+    
+    def _check_cluster_events(self, msg: str, syslog_id: str):
+        """Detect cluster split-brain and node disconnect."""
+        msg_lower = msg.lower()
+        
+        # Split-brain
+        if any(p in msg_lower for p in ['split-brain', 'split brain',
+                                          'fencing required', 'cluster partition']):
+            quorum = 'unknown'
+            if 'quorum' in msg_lower:
+                quorum = 'lost' if 'lost' in msg_lower else 'valid'
+            
+            self._emit('split_brain', 'CRITICAL', {
+                'quorum': quorum,
+                'reason': msg[:200],
+                'hostname': self._hostname,
+            }, entity='cluster', entity_id=self._hostname)
+            return
+        
+        # Node disconnect
+        if (('quorum' in msg_lower and 'lost' in msg_lower) or
+            ('node' in msg_lower and any(w in msg_lower for w in ['left', 'offline', 'lost']))):
+            
+            node_match = re.search(r'[Nn]ode\s+(\S+)', msg)
+            node_name = node_match.group(1) if node_match else 'unknown'
+            
+            self._emit('node_disconnect', 'CRITICAL', {
+                'node_name': node_name,
+                'hostname': self._hostname,
+            }, entity='cluster', entity_id=node_name)
+    
+    def _check_system_shutdown(self, msg: str, syslog_id: str):
+        """Detect system shutdown/reboot.
+        
+        Matches multiple systemd signals that indicate the node is going down:
+          - "Shutting down."  (systemd PID 1)
+          - "System is powering off."  / "System is rebooting."
+          - "Reached target Shutdown." / "Reached target Reboot."
+          - "Journal stopped"  (very late in shutdown)
+          - "The system will reboot now!"  / "The system will power off now!"
+        """
+        msg_lower = msg.lower()
+        
+        # Only process systemd / logind messages
+        if not any(s in syslog_id for s in ('systemd', 'logind', '')):
+            if 'systemd' not in msg_lower:
+                return
+        
+        is_reboot = False
+        is_shutdown = False
+        
+        # Detect reboot signals
+        reboot_signals = [
+            'system is rebooting',
+            'reached target reboot',
+            'the system will reboot now',
+            'starting reboot',
+        ]
+        for sig in reboot_signals:
+            if sig in msg_lower:
+                is_reboot = True
+                break
+        
+        # Detect shutdown/poweroff signals
+        if not is_reboot:
+            shutdown_signals = [
+                'system is powering off',
+                'system is halting',
+                'shutting down',
+                'reached target shutdown',
+                'reached target halt',
+                'the system will power off now',
+                'starting power-off',
+                'journal stopped',
+                'stopping journal service',
+            ]
+            for sig in shutdown_signals:
+                if sig in msg_lower:
+                    is_shutdown = True
+                    break
+        
+        if is_reboot:
+            self._emit('system_reboot', 'CRITICAL', {
+                'reason': msg[:200],
+                'hostname': self._hostname,
+            }, entity='node', entity_id='')
+        elif is_shutdown:
+            self._emit('system_shutdown', 'CRITICAL', {
+                'reason': msg[:200],
+                'hostname': self._hostname,
+            }, entity='node', entity_id='')
+    
+    def _check_permission_change(self, msg: str, syslog_id: str):
+        """Detect user permission changes in PVE."""
+        permission_patterns = [
+            (r'set permissions.*user\s+(\S+)', 'Permission changed'),
+            (r'user added to group.*?(\S+)', 'Added to group'),
+            (r'user removed from group.*?(\S+)', 'Removed from group'),
+            (r'ACL updated.*?(\S+)', 'ACL updated'),
+            (r'Role assigned.*?(\S+)', 'Role assigned'),
+        ]
+        
+        for pattern, action in permission_patterns:
+            match = re.search(pattern, msg, re.IGNORECASE)
+            if match:
+                username = match.group(1)
+                self._emit('user_permission_change', 'INFO', {
+                    'username': username,
+                    'change_details': action,
+                    'hostname': self._hostname,
+                }, entity='user', entity_id=username)
+                return
+    
+    def _check_firewall(self, msg: str, syslog_id: str):
+        """Detect firewall issues (not individual drops, but rule errors)."""
+        if re.search(r'pve-firewall.*(?:error|failed|unable)', msg, re.IGNORECASE):
+            self._emit('firewall_issue', 'WARNING', {
+                'reason': msg[:200],
+                'hostname': self._hostname,
+            }, entity='network', entity_id='')
+    
+    # ── Emit helper ──
+    
+    def _emit(self, event_type: str, severity: str, data: Dict,
+              entity: str = 'node', entity_id: str = ''):
+        """Emit event to queue with short-term deduplication (30s window)."""
+        event = NotificationEvent(
+            event_type, severity, data, source='journal',
+            entity=entity, entity_id=entity_id,
+        )
+        
+        now = time.time()
+        last = self._recent_events.get(event.fingerprint, 0)
+        if now - last < self._dedup_window:
+            return  # Skip duplicate within 30s window
+        
+        self._recent_events[event.fingerprint] = now
+        
+        # Cleanup old dedup entries periodically
+        if len(self._recent_events) > 200:
+            cutoff = now - self._dedup_window * 2
+            self._recent_events = {
+                k: v for k, v in self._recent_events.items() if v > cutoff
+            }
+        
+        self._queue.put(event)
+
+
+# ─── Task Watcher (Real-time) ────────────────────────────────────
+
+class TaskWatcher:
+    """Watches /var/log/pve/tasks/index for VM/CT and backup events.
+    
+    The PVE task index file is appended when tasks start/finish.
+    Format: UPID:node:pid:pstart:starttime:type:id:user:
+    Final status is recorded when task completes.
+    """
+    
+    TASK_LOG = '/var/log/pve/tasks/index'
+    
+    # Map PVE task types to our event types
+    TASK_MAP = {
+        'qmstart':    ('vm_start',    'INFO'),
+        'qmstop':     ('vm_stop',     'INFO'),
+        'qmshutdown': ('vm_shutdown', 'INFO'),
+        'qmreboot':   ('vm_restart',  'INFO'),
+        'qmreset':    ('vm_restart',  'INFO'),
+        'vzstart':    ('ct_start',    'INFO'),
+        'vzstop':     ('ct_stop',     'INFO'),
+        'vzshutdown': ('ct_shutdown', 'INFO'),
+        'vzreboot':   ('ct_restart',  'INFO'),
+        'vzdump':     ('backup_start', 'INFO'),
+        'qmsnapshot': ('snapshot_complete', 'INFO'),
+        'vzsnapshot': ('snapshot_complete', 'INFO'),
+        'qmigrate':   ('migration_start', 'INFO'),
+        'vzmigrate':  ('migration_start', 'INFO'),
+    }
+    
+    def __init__(self, event_queue: Queue):
+        self._queue = event_queue
+        self._running = False
+        self._thread: Optional[threading.Thread] = None
+        self._hostname = _hostname()
+        self._last_position = 0
+        # Cache for active vzdump detection
+        self._vzdump_active_cache: float = 0  # timestamp of last positive check
+        self._vzdump_cache_ttl = 5  # cache result for 5s
+    
+    def start(self):
+        if self._running:
+            return
+        self._running = True
+        
+        # Start at end of file
+        if os.path.exists(self.TASK_LOG):
+            try:
+                self._last_position = os.path.getsize(self.TASK_LOG)
+            except OSError:
+                self._last_position = 0
+        
+        self._thread = threading.Thread(target=self._watch_loop, daemon=True,
+                                        name='task-watcher')
+        self._thread.start()
+    
+    def stop(self):
+        self._running = False
+    
+    def _is_vzdump_active(self) -> bool:
+        """Check if a vzdump (backup) job is currently running.
+        
+        Reads /var/log/pve/tasks/active which lists all running PVE tasks.
+        Also verifies the process is actually alive (PID check).
+        Result is cached for a few seconds to avoid excessive file reads.
+        """
+        now = time.time()
+        # Negative cache: if we recently confirmed NO vzdump, skip the check
+        if hasattr(self, '_vzdump_negative_cache') and \
+           now - self._vzdump_negative_cache < self._vzdump_cache_ttl:
+            return False
+        # Positive cache
+        if now - self._vzdump_active_cache < self._vzdump_cache_ttl:
+            return True
+        
+        active_file = '/var/log/pve/tasks/active'
+        try:
+            with open(active_file, 'r') as f:
+                for line in f:
+                    # UPID format: UPID:node:pid:pstart:starttime:type:id:user:
+                    if ':vzdump:' in line:
+                        # Verify the PID is still alive
+                        parts = line.strip().split(':')
+                        if len(parts) >= 3:
+                            try:
+                                pid = int(parts[2])
+                                os.kill(pid, 0)  # Signal 0 = just check existence
+                                self._vzdump_active_cache = now
+                                return True
+                            except (ValueError, ProcessLookupError, PermissionError):
+                                pass  # PID not found or not a number -- stale entry
+        except (OSError, IOError):
+            pass
+        
+        self._vzdump_negative_cache = now
+        return False
+    
+    def _watch_loop(self):
+        """Poll the task index file for new entries."""
+        while self._running:
+            try:
+                if os.path.exists(self.TASK_LOG):
+                    current_size = os.path.getsize(self.TASK_LOG)
+                    
+                    if current_size < self._last_position:
+                        # File was truncated/rotated
+                        self._last_position = 0
+                    
+                    if current_size > self._last_position:
+                        with open(self.TASK_LOG, 'r') as f:
+                            f.seek(self._last_position)
+                            new_lines = f.readlines()
+                            self._last_position = f.tell()
+                        
+                        for line in new_lines:
+                            self._process_task_line(line.strip())
+            except Exception as e:
+                print(f"[TaskWatcher] Error reading task log: {e}")
+            
+            time.sleep(2)  # Check every 2 seconds
+    
+    def _process_task_line(self, line: str):
+        """Process a single task index line.
+        
+        PVE task index format (space-separated):
+        UPID endtime status
+        Where UPID = UPID:node:pid:pstart:starttime:type:id:user:
+        """
+        if not line:
+            return
+        
+        parts = line.split()
+        if not parts:
+            return
+        
+        upid = parts[0]
+        status = parts[2] if len(parts) >= 3 else ''
+        
+        # Parse UPID
+        upid_parts = upid.split(':')
+        if len(upid_parts) < 8:
+            return
+        
+        task_type = upid_parts[5]
+        vmid = upid_parts[6]
+        user = upid_parts[7]
+        
+        # Get VM/CT name
+        vmname = self._get_vm_name(vmid) if vmid else ''
+        
+        # Map to event type
+        event_info = self.TASK_MAP.get(task_type)
+        if not event_info:
+            return
+        
+        event_type, default_severity = event_info
+        
+
+        
+        # Check if task failed
+        is_error = status and status != 'OK' and status != ''
+        
+        if is_error:
+            # Override to failure event
+            if 'start' in event_type:
+                event_type = event_type.replace('_start', '_fail')
+            elif 'complete' in event_type:
+                event_type = event_type.replace('_complete', '_fail')
+            severity = 'CRITICAL'
+        elif status == 'OK':
+            # Task completed successfully
+            if event_type == 'backup_start':
+                event_type = 'backup_complete'
+            elif event_type == 'migration_start':
+                event_type = 'migration_complete'
+            severity = 'INFO'
+        else:
+            # Task just started (no status yet)
+            severity = default_severity
+        
+        data = {
+            'vmid': vmid,
+            'vmname': vmname or f'ID {vmid}',
+            'hostname': self._hostname,
+            'user': user,
+            'reason': status if is_error else '',
+            'target_node': '',
+            'size': '',
+            'snapshot_name': '',
+        }
+        
+        # Determine entity type from task type
+        entity = 'ct' if task_type.startswith('vz') else 'vm'
+        
+        # Backup and replication events are handled EXCLUSIVELY by the PVE
+        # webhook, which delivers much richer data (full logs, sizes, durations,
+        # filenames). TaskWatcher skips these entirely to avoid duplicates.
+        _WEBHOOK_EXCLUSIVE = {'backup_complete', 'backup_fail', 'backup_start',
+                              'replication_complete', 'replication_fail'}
+        if event_type in _WEBHOOK_EXCLUSIVE:
+            return
+        
+        # Suppress VM/CT start/stop/shutdown while a vzdump is active.
+        # These are backup-induced operations (mode=stop), not user actions.
+        # Exception: if a VM/CT FAILS to start after backup, that IS important.
+        _BACKUP_NOISE = {'vm_start', 'vm_stop', 'vm_shutdown', 'vm_restart',
+                         'ct_start', 'ct_stop', 'ct_shutdown', 'ct_restart'}
+        if event_type in _BACKUP_NOISE and not is_error:
+            if self._is_vzdump_active():
+                return
+        
+        self._queue.put(NotificationEvent(
+            event_type, severity, data, source='tasks',
+            entity=entity, entity_id=vmid,
+        ))
+    
+    def _get_vm_name(self, vmid: str) -> str:
+        """Try to resolve VMID to name via config files."""
+        if not vmid:
+            return ''
+        
+        # Try QEMU
+        conf_path = f'/etc/pve/qemu-server/{vmid}.conf'
+        name = self._read_name_from_conf(conf_path)
+        if name:
+            return name
+        
+        # Try LXC
+        conf_path = f'/etc/pve/lxc/{vmid}.conf'
+        name = self._read_name_from_conf(conf_path)
+        if name:
+            return name
+        
+        return ''
+    
+    @staticmethod
+    def _read_name_from_conf(path: str) -> str:
+        """Read 'name:' or 'hostname:' from PVE config file."""
+        try:
+            if not os.path.exists(path):
+                return ''
+            with open(path, 'r') as f:
+                for line in f:
+                    if line.startswith('name:'):
+                        return line.split(':', 1)[1].strip()
+                    if line.startswith('hostname:'):
+                        return line.split(':', 1)[1].strip()
+        except (IOError, PermissionError):
+            pass
+        return ''
+
+
+# ─── Polling Collector ────────────────────────────────────────────
+
+class PollingCollector:
+    """Periodic collector that polls health state independently.
+    
+    Architecture:
+    - Completely independent from Health Monitor's suppression system.
+      Suppression Duration only affects the UI health badge; it NEVER blocks
+      notifications.
+    - Reads ``get_active_errors()`` (ALL active errors, even suppressed ones)
+      and decides when to notify based on its own 24-hour cycle.
+    - For *new* errors (first_seen within the last poll interval), notifies
+      immediately.
+    - For *persistent* errors (already known), re-notifies once every 24 h.
+    - Update checks run on their own 24-h timer and include security counts.
+    
+    Tracking is stored in ``notification_last_sent`` (same DB).
+    """
+    
+    DIGEST_INTERVAL = 86400       # 24 h between re-notifications
+    UPDATE_CHECK_INTERVAL = 86400 # 24 h between update scans
+    NEW_ERROR_WINDOW = 120        # seconds – errors younger than this are "new"
+    
+    _ENTITY_MAP = {
+        'cpu': ('node', ''), 'memory': ('node', ''), 'temperature': ('node', ''),
+        'disk': ('storage', ''), 'network': ('network', ''),
+        'pve_services': ('node', ''), 'security': ('user', ''),
+        'updates': ('node', ''), 'storage': ('storage', ''),
+    }
+    
+    # Map health-persistence category names to our TEMPLATES event types.
+    # These must match keys in notification_templates.TEMPLATES exactly.
+    _CATEGORY_TO_EVENT_TYPE = {
+        'cpu': 'cpu_high',
+        'memory': 'ram_high',
+        'load': 'load_high',
+        'temperature': 'temp_high',
+        'disk': 'disk_space_low',
+        'storage': 'storage_unavailable',
+        'network': 'network_down',
+        'pve_services': 'service_fail',
+        'security': 'auth_fail',
+        'updates': 'update_available',
+        'zfs': 'disk_io_error',
+        'smart': 'disk_io_error',
+        'disks': 'disk_io_error',
+        'logs': 'system_problem',
+        'vms': 'system_problem',
+    }
+    
+    def __init__(self, event_queue: Queue, poll_interval: int = 60):
+        self._queue = event_queue
+        self._running = False
+        self._thread: Optional[threading.Thread] = None
+        self._poll_interval = poll_interval
+        self._hostname = _hostname()
+        self._last_update_check = 0
+        # In-memory cache: error_key -> last notification timestamp
+        self._last_notified: Dict[str, float] = {}
+        # Track known error keys so we can detect truly new ones
+        self._known_errors: set = set()
+        self._first_poll_done = False
+    
+    def start(self):
+        if self._running:
+            return
+        self._running = True
+        self._load_last_notified()
+        self._thread = threading.Thread(target=self._poll_loop, daemon=True,
+                                        name='polling-collector')
+        self._thread.start()
+    
+    def stop(self):
+        self._running = False
+    
+    # ── Main loop ──────────────────────────────────────────────
+    
+    def _poll_loop(self):
+        """Main polling loop."""
+        # Initial delay to let health monitor warm up
+        for _ in range(15):
+            if not self._running:
+                return
+            time.sleep(1)
+        
+        while self._running:
+            try:
+                self._check_persistent_health()
+                self._check_updates()
+            except Exception as e:
+                print(f"[PollingCollector] Error: {e}")
+            
+            for _ in range(self._poll_interval):
+                if not self._running:
+                    return
+                time.sleep(1)
+    
+    # ── Health errors (independent of suppression) ─────────────
+    
+    def _check_persistent_health(self):
+        """Read ALL active errors from health_persistence and decide
+        whether each one warrants a notification right now.
+        
+        Rules:
+        - A *new* error (not in _known_errors) -> notify immediately
+        - A *persistent* error already notified -> re-notify after 24 h
+        - Uses its own tracking, NOT the health monitor's needs_notification flag
+        """
+        try:
+            from health_persistence import health_persistence
+            errors = health_persistence.get_active_errors()
+        except ImportError:
+            return
+        except Exception as e:
+            print(f"[PollingCollector] get_active_errors failed: {e}")
+            return
+        
+        now = time.time()
+        current_keys = set()
+        
+        for error in errors:
+            error_key = error.get('error_key', '')
+            if not error_key:
+                continue
+            
+            current_keys.add(error_key)
+            category = error.get('category', '')
+            severity = error.get('severity', 'WARNING')
+            reason = error.get('reason', '')
+            
+            # Determine if we should notify
+            is_new = error_key not in self._known_errors and self._first_poll_done
+            last_sent = self._last_notified.get(error_key, 0)
+            is_due = (now - last_sent) >= self.DIGEST_INTERVAL
+            
+            if not is_new and not is_due:
+                continue
+            
+            # Map to our event type
+            event_type = self._CATEGORY_TO_EVENT_TYPE.get(category, 'system_problem')
+            entity, eid = self._ENTITY_MAP.get(category, ('node', ''))
+            
+            data = {
+                'hostname': self._hostname,
+                'category': category,
+                'reason': reason,
+                'error_key': error_key,
+                'severity': severity,
+                'first_seen': error.get('first_seen', ''),
+                'last_seen': error.get('last_seen', ''),
+                'is_persistent': not is_new,
+            }
+            
+            # Include extra details if present
+            details = error.get('details')
+            if isinstance(details, dict):
+                data.update(details)
+            elif isinstance(details, str):
+                try:
+                    data.update(json.loads(details))
+                except (json.JSONDecodeError, TypeError):
+                    pass
+            
+            self._queue.put(NotificationEvent(
+                event_type, severity, data, source='health',
+                entity=entity, entity_id=eid or error_key,
+            ))
+            
+            # Track that we notified
+            self._last_notified[error_key] = now
+            self._persist_last_notified(error_key, now)
+        
+        # Remove tracking for errors that resolved
+        resolved = self._known_errors - current_keys
+        for key in resolved:
+            self._last_notified.pop(key, None)
+        
+        self._known_errors = current_keys
+        self._first_poll_done = True
+    
+    # ── Update check (enriched) ────────────────────────────────
+    
+    def _check_updates(self):
+        """Check for available system updates every 24 h.
+        
+        Enriched output: total count, security updates, PVE version hint,
+        and top package names.
+        """
+        now = time.time()
+        if now - self._last_update_check < self.UPDATE_CHECK_INTERVAL:
+            return
+        
+        self._last_update_check = now
+        
+        try:
+            result = subprocess.run(
+                ['apt-get', '-s', 'upgrade'],
+                capture_output=True, text=True, timeout=60,
+            )
+            if result.returncode != 0:
+                return
+            
+            lines = [l for l in result.stdout.split('\n') if l.startswith('Inst ')]
+            total = len(lines)
+            if total == 0:
+                return
+            
+            packages = [l.split()[1] for l in lines]
+            security = [p for p in packages if any(
+                kw in p.lower() for kw in ('security', 'cve', 'openssl', 'libssl')
+            )]
+            
+            # Also detect security updates via apt changelog / Debian-Security origin
+            sec_result = subprocess.run(
+                ['apt-get', '-s', 'upgrade', '-o', 'Dir::Etc::SourceList=/dev/null',
+                 '-o', 'Dir::Etc::SourceParts=/dev/null'],
+                capture_output=True, text=True, timeout=30,
+            )
+            # Count lines from security repo (rough heuristic)
+            sec_count = max(len(security), 0)
+            try:
+                sec_output = subprocess.run(
+                    ['apt-get', '-s', '--only-upgrade', 'install'] + packages[:50],
+                    capture_output=True, text=True, timeout=30,
+                )
+                for line in sec_output.stdout.split('\n'):
+                    if 'security' in line.lower() and 'Inst ' in line:
+                        sec_count += 1
+            except Exception:
+                pass
+            
+            # Check for PVE version upgrade
+            pve_packages = [p for p in packages if 'pve-' in p.lower() or 'proxmox-' in p.lower()]
+            
+            # Build display details
+            top_pkgs = packages[:8]
+            details = ', '.join(top_pkgs)
+            if total > 8:
+                details += f', ... +{total - 8} more'
+            
+            data = {
+                'hostname': self._hostname,
+                'count': str(total),
+                'security_count': str(sec_count),
+                'details': details,
+                'packages': ', '.join(packages[:20]),
+            }
+            if pve_packages:
+                data['pve_packages'] = ', '.join(pve_packages)
+            
+            self._queue.put(NotificationEvent(
+                'update_available', 'INFO', data,
+                source='polling', entity='node', entity_id='',
+            ))
+        except Exception:
+            pass
+    
+    # ── Persistence helpers ────────────────────────────────────
+    
+    def _load_last_notified(self):
+        """Load per-error notification timestamps from DB on startup."""
+        try:
+            db_path = Path('/usr/local/share/proxmenux/health_monitor.db')
+            if not db_path.exists():
+                return
+            conn = sqlite3.connect(str(db_path), timeout=10)
+            conn.execute('PRAGMA journal_mode=WAL')
+            cursor = conn.cursor()
+            cursor.execute(
+                "SELECT fingerprint, last_sent_ts FROM notification_last_sent "
+                "WHERE fingerprint LIKE 'health_%'"
+            )
+            for fp, ts in cursor.fetchall():
+                error_key = fp.replace('health_', '', 1)
+                self._last_notified[error_key] = ts
+                self._known_errors.add(error_key)
+            conn.close()
+        except Exception as e:
+            print(f"[PollingCollector] Failed to load last_notified: {e}")
+    
+    def _persist_last_notified(self, error_key: str, ts: float):
+        """Save per-error notification timestamp to DB."""
+        try:
+            db_path = Path('/usr/local/share/proxmenux/health_monitor.db')
+            conn = sqlite3.connect(str(db_path), timeout=10)
+            conn.execute('PRAGMA journal_mode=WAL')
+            conn.execute('PRAGMA busy_timeout=5000')
+            fp = f'health_{error_key}'
+            conn.execute('''
+                INSERT OR REPLACE INTO notification_last_sent (fingerprint, last_sent_ts, count)
+                VALUES (?, ?, COALESCE(
+                    (SELECT count + 1 FROM notification_last_sent WHERE fingerprint = ?), 1
+                ))
+            ''', (fp, int(ts), fp))
+            conn.commit()
+            conn.close()
+        except Exception:
+            pass
+
+
+# ─── Proxmox Webhook Receiver ───────────────────────────────────
+
+class ProxmoxHookWatcher:
+    """Receives native Proxmox VE notifications via local webhook endpoint.
+    
+    Configured automatically via /etc/pve/notifications.cfg (endpoint +
+    matcher blocks). The setup-webhook API writes these blocks on first
+    enable. See flask_notification_routes.py for details.
+    
+    Payload varies by source (storage, replication, cluster, PBS, apt).
+    This class normalizes them into NotificationEvent objects.
+    """
+    
+    def __init__(self, event_queue: Queue):
+        self._queue = event_queue
+        self._hostname = _hostname()
+    
+    def process_webhook(self, payload: dict) -> dict:
+        """Process an incoming Proxmox webhook payload.
+        
+        The PVE webhook is the PRIMARY source for vzdump, replication,
+        fencing, package-updates and system-mail events.  PVE sends rich
+        detail (full logs, sizes, durations) that TaskWatcher cannot match.
+        
+        Body template delivers:
+          {title, message, severity, timestamp, fields: {type, hostname, job-id}}
+        
+        Returns: {'accepted': bool, 'event_type': str, 'event_id': str}
+        """
+        if not payload:
+            return {'accepted': False, 'error': 'Empty payload'}
+        
+        # ── Extract structured PVE fields ──
+        fields = payload.get('fields') or {}
+        if isinstance(fields, str):
+            # Edge case: {{ json fields }} rendered as string instead of dict
+            try:
+                import json
+                fields = json.loads(fields)
+            except (json.JSONDecodeError, ValueError):
+                fields = {}
+        
+        pve_type = fields.get('type', '').lower().strip()
+        pve_hostname = fields.get('hostname', self._hostname)
+        pve_job_id = fields.get('job-id', '')
+        
+        title = payload.get('title', '')
+        message = payload.get('message', payload.get('body', ''))
+        severity_raw = payload.get('severity', 'info').lower().strip()
+        timestamp = payload.get('timestamp', '')
+        
+        # ── Classify by PVE type (direct, no heuristics needed) ──
+        import re
+        event_type, entity, entity_id = self._classify_pve(
+            pve_type, severity_raw, title, message
+        )
+        
+        # Discard meta-events
+        if event_type == '_skip':
+            return {'accepted': False, 'skipped': True, 'reason': 'Meta-event filtered'}
+        
+        severity = self._map_severity(severity_raw)
+        
+        # ── Build rich data dict ──
+        # For webhook events, PVE's `message` IS the notification body.
+        # It contains full vzdump logs, package lists, error details, etc.
+        # We pass it as 'pve_message' so templates can use it directly.
+        data = {
+            'hostname': pve_hostname,
+            'pve_type': pve_type,
+            'pve_message': message,
+            'pve_title': title,
+            'title': title,
+            'job_id': pve_job_id,
+        }
+        
+        # Extract VMID and VM name from message for vzdump events
+        if pve_type == 'vzdump' and message:
+            # PVE vzdump messages contain lines like:
+            #   "INFO: Starting Backup of VM 100 (qemu)"
+            #   "VMID       Name   Status   Time   Size   Filename"
+            #   "100   arch-linux    OK    00:05:30   1.2G   /path/to/file"
+            vmids = re.findall(r'(?:VM|CT)\s+(\d+)', message, re.IGNORECASE)
+            if vmids:
+                data['vmid'] = vmids[0]
+                entity_id = vmids[0]
+            # Try to extract VM name from the table line
+            name_m = re.search(r'(\d+)\s+(\S+)\s+(?:OK|ERROR|WARNINGS)', message)
+            if name_m:
+                data['vmname'] = name_m.group(2)
+            # Extract size from "Total size: X"
+            size_m = re.search(r'Total size:\s*(.+?)(?:\n|$)', message)
+            if size_m:
+                data['size'] = size_m.group(1).strip()
+            # Extract duration from "Total running time: X"
+            dur_m = re.search(r'Total running time:\s*(.+?)(?:\n|$)', message)
+            if dur_m:
+                data['duration'] = dur_m.group(1).strip()
+        
+        event = NotificationEvent(
+            event_type=event_type,
+            severity=severity,
+            data=data,
+            source='proxmox_hook',
+            entity=entity,
+            entity_id=entity_id,
+            raw=payload,
+        )
+        
+        self._queue.put(event)
+        return {'accepted': True, 'event_type': event_type, 'event_id': event.event_id}
+    
+    def _classify_pve(self, pve_type: str, severity: str,
+                      title: str, message: str) -> tuple:
+        """Classify using PVE's structured fields.type.
+        
+        Returns (event_type, entity, entity_id).
+        """
+        title_lower = (title or '').lower()
+        
+        # Skip overall/updates status change meta-events
+        if 'overall' in title_lower and ('changed' in title_lower or 'status' in title_lower):
+            return '_skip', '', ''
+        if 'updates' in title_lower and ('changed' in title_lower or 'status' in title_lower):
+            return '_skip', '', ''
+        
+        # ── Direct classification by PVE type ──
+        if pve_type == 'vzdump':
+            if severity in ('error', 'err'):
+                return 'backup_fail', 'vm', ''
+            return 'backup_complete', 'vm', ''
+        
+        if pve_type == 'fencing':
+            return 'split_brain', 'node', ''
+        
+        if pve_type == 'replication':
+            return 'replication_fail', 'vm', ''
+        
+        if pve_type == 'package-updates':
+            return 'update_available', 'node', ''
+        
+        if pve_type == 'system-mail':
+            return 'system_mail', 'node', ''
+        
+        # ── Fallback for unknown/empty pve_type ──
+        # (e.g. test notifications, future PVE event types)
+        msg_lower = (message or '').lower()
+        text = f"{title_lower} {msg_lower}"
+        
+        if 'vzdump' in text or 'backup' in text:
+            import re
+            m = re.search(r'(?:vm|ct)\s+(\d+)', text, re.IGNORECASE)
+            vmid = m.group(1) if m else ''
+            if any(w in text for w in ('fail', 'error')):
+                return 'backup_fail', 'vm', vmid
+            return 'backup_complete', 'vm', vmid
+        
+        if 'replication' in text:
+            return 'replication_fail', 'vm', ''
+        
+        # Generic fallback
+        return 'system_problem', 'node', ''
+    
+    # Old _classify removed -- replaced by _classify_pve above.
+    
+    @staticmethod
+    def _map_severity(raw: str) -> str:
+        raw_l = str(raw).lower()
+        if raw_l in ('critical', 'emergency', 'alert', 'crit', 'err', 'error'):
+            return 'CRITICAL'
+        if raw_l in ('warning', 'warn', 'notice'):
+            return 'WARNING'
+        return 'INFO'
diff --git a/AppImage/scripts/notification_manager.py b/AppImage/scripts/notification_manager.py
new file mode 100644
index 00000000..3b2bed92
--- /dev/null
+++ b/AppImage/scripts/notification_manager.py
@@ -0,0 +1,1283 @@
+"""
+ProxMenux Notification Manager
+Central orchestrator for the notification service.
+
+Connects:
+- notification_channels.py  (transport: Telegram, Gotify, Discord)
+- notification_templates.py (message formatting + optional AI)
+- notification_events.py    (event detection: Journal, Task, Polling watchers)
+- health_persistence.py     (DB: config storage, notification_history)
+
+Two interfaces consume this module:
+1. Server mode: Flask imports and calls start()/stop()/send_notification()
+2. CLI mode:    `python3 notification_manager.py --action send --type vm_fail ...`
+                Scripts .sh in /usr/local/share/proxmenux/scripts call this directly.
+
+Author: MacRimi
+"""
+
+import json
+import os
+import sys
+import time
+import socket
+import sqlite3
+import threading
+from queue import Queue, Empty
+from datetime import datetime
+from typing import Dict, Any, List, Optional
+from pathlib import Path
+
+# Ensure local imports work
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+if BASE_DIR not in sys.path:
+    sys.path.insert(0, BASE_DIR)
+
+from notification_channels import create_channel, CHANNEL_TYPES
+from notification_templates import (
+    render_template, format_with_ai, TEMPLATES,
+    EVENT_GROUPS, get_event_types_by_group, get_default_enabled_events
+)
+from notification_events import (
+    JournalWatcher, TaskWatcher, PollingCollector, NotificationEvent,
+    ProxmoxHookWatcher,
+)
+
+
+# ─── Constants ────────────────────────────────────────────────────
+
+DB_PATH = Path('/usr/local/share/proxmenux/health_monitor.db')
+SETTINGS_PREFIX = 'notification.'
+
+# Cooldown defaults (seconds)
+DEFAULT_COOLDOWNS = {
+    'CRITICAL': 60,      # 60s minimum (prevents storm, delivers fast)
+    'WARNING':  300,     # 5 min
+    'INFO':     900,     # 15 min
+    'resources': 900,    # 15 min for resource alerts
+    'updates':  86400,   # 24h for update notifications
+}
+
+
+# ─── Storm Protection ────────────────────────────────────────────
+
+GROUP_RATE_LIMITS = {
+    'security':  {'max_per_minute': 5,  'max_per_hour': 30},
+    'storage':   {'max_per_minute': 3,  'max_per_hour': 20},
+    'cluster':   {'max_per_minute': 5,  'max_per_hour': 20},
+    'network':   {'max_per_minute': 3,  'max_per_hour': 15},
+    'resources': {'max_per_minute': 3,  'max_per_hour': 20},
+    'vm_ct':     {'max_per_minute': 10, 'max_per_hour': 60},
+    'backup':    {'max_per_minute': 5,  'max_per_hour': 30},
+    'system':    {'max_per_minute': 5,  'max_per_hour': 30},
+}
+
+
+class GroupRateLimiter:
+    """Rate limiter per event group. Prevents notification storms."""
+    
+    def __init__(self):
+        from collections import deque
+        self._deque = deque
+        self._minute_counts: Dict[str, Any] = {}  # group -> deque[timestamp]
+        self._hour_counts: Dict[str, Any] = {}    # group -> deque[timestamp]
+    
+    def allow(self, group: str) -> bool:
+        """Check if group rate limit allows this event."""
+        limits = GROUP_RATE_LIMITS.get(group, GROUP_RATE_LIMITS['system'])
+        now = time.time()
+        
+        # Initialize if needed
+        if group not in self._minute_counts:
+            self._minute_counts[group] = self._deque()
+            self._hour_counts[group] = self._deque()
+        
+        # Prune old entries
+        minute_q = self._minute_counts[group]
+        hour_q = self._hour_counts[group]
+        while minute_q and now - minute_q[0] > 60:
+            minute_q.popleft()
+        while hour_q and now - hour_q[0] > 3600:
+            hour_q.popleft()
+        
+        # Check limits
+        if len(minute_q) >= limits['max_per_minute']:
+            return False
+        if len(hour_q) >= limits['max_per_hour']:
+            return False
+        
+        # Record
+        minute_q.append(now)
+        hour_q.append(now)
+        return True
+    
+    def get_stats(self) -> Dict[str, Dict[str, int]]:
+        """Return current rate stats per group."""
+        now = time.time()
+        stats = {}
+        for group in self._minute_counts:
+            minute_q = self._minute_counts.get(group, [])
+            hour_q = self._hour_counts.get(group, [])
+            stats[group] = {
+                'last_minute': sum(1 for t in minute_q if now - t <= 60),
+                'last_hour': sum(1 for t in hour_q if now - t <= 3600),
+            }
+        return stats
+
+
+AGGREGATION_RULES = {
+    'auth_fail':     {'window': 120, 'min_count': 3,  'burst_type': 'burst_auth_fail'},
+    'ip_block':      {'window': 120, 'min_count': 3,  'burst_type': 'burst_ip_block'},
+    'disk_io_error': {'window': 60,  'min_count': 3,  'burst_type': 'burst_disk_io'},
+    'split_brain':   {'window': 300, 'min_count': 2,  'burst_type': 'burst_cluster'},
+    'node_disconnect': {'window': 300, 'min_count': 2, 'burst_type': 'burst_cluster'},
+}
+
+
+class BurstAggregator:
+    """Accumulates similar events in a time window, then sends a single summary.
+    
+    Examples:
+    - "Fail2Ban banned 17 IPs in 2 minutes"
+    - "Disk I/O errors: 34 events on /dev/sdb in 60s"
+    """
+    
+    def __init__(self):
+        self._buckets: Dict[str, List] = {}         # bucket_key -> [events]
+        self._deadlines: Dict[str, float] = {}      # bucket_key -> flush_deadline
+        self._lock = threading.Lock()
+    
+    def ingest(self, event: NotificationEvent) -> Optional[NotificationEvent]:
+        """Add event to aggregation. Returns:
+        - None if event is being buffered (wait for window)
+        - Original event if not eligible for aggregation
+        """
+        rule = AGGREGATION_RULES.get(event.event_type)
+        if not rule:
+            return event  # Not aggregable, pass through
+        
+        bucket_key = f"{event.event_type}:{event.data.get('hostname', '')}"
+        
+        with self._lock:
+            if bucket_key not in self._buckets:
+                self._buckets[bucket_key] = []
+                self._deadlines[bucket_key] = time.time() + rule['window']
+            
+            self._buckets[bucket_key].append(event)
+            
+            # First event in bucket: pass through immediately so user gets fast alert
+            if len(self._buckets[bucket_key]) == 1:
+                return event
+            
+            # Subsequent events: buffer (will be flushed as summary)
+            return None
+    
+    def flush_expired(self) -> List[NotificationEvent]:
+        """Flush all buckets past their deadline. Returns summary events."""
+        now = time.time()
+        summaries = []
+        
+        with self._lock:
+            expired_keys = [k for k, d in self._deadlines.items() if now >= d]
+            
+            for key in expired_keys:
+                events = self._buckets.pop(key, [])
+                del self._deadlines[key]
+                
+                if len(events) < 2:
+                    continue  # Single event already sent on ingest, no summary needed
+                
+                rule_type = key.split(':')[0]
+                rule = AGGREGATION_RULES.get(rule_type, {})
+                min_count = rule.get('min_count', 2)
+                
+                if len(events) < min_count:
+                    continue  # Not enough events for a summary
+                
+                summary = self._create_summary(events, rule)
+                if summary:
+                    summaries.append(summary)
+        
+        return summaries
+    
+    def _create_summary(self, events: List[NotificationEvent],
+                        rule: dict) -> Optional[NotificationEvent]:
+        """Create a single summary event from multiple events."""
+        if not events:
+            return None
+        
+        first = events[0]
+        # Determine highest severity
+        sev_order = {'INFO': 0, 'WARNING': 1, 'CRITICAL': 2}
+        max_severity = max(events, key=lambda e: sev_order.get(e.severity, 0)).severity
+        
+        # Collect unique entity_ids
+        entity_ids = list(set(e.entity_id for e in events if e.entity_id))
+        entity_list = ', '.join(entity_ids[:10]) if entity_ids else 'multiple sources'
+        if len(entity_ids) > 10:
+            entity_list += f' (+{len(entity_ids) - 10} more)'
+        
+        # Calculate window
+        window_secs = events[-1].ts_epoch - events[0].ts_epoch
+        if window_secs < 120:
+            window_str = f'{int(window_secs)}s'
+        else:
+            window_str = f'{int(window_secs / 60)}m'
+        
+        burst_type = rule.get('burst_type', 'burst_generic')
+        
+        data = {
+            'hostname': first.data.get('hostname', socket.gethostname()),
+            'count': str(len(events)),
+            'window': window_str,
+            'entity_list': entity_list,
+            'event_type': first.event_type,
+        }
+        
+        return NotificationEvent(
+            event_type=burst_type,
+            severity=max_severity,
+            data=data,
+            source='aggregator',
+            entity=first.entity,
+            entity_id='burst',
+        )
+
+
+# ─── Notification Manager ─────────────────────────────────────────
+
+class NotificationManager:
+    """Central notification orchestrator.
+    
+    Manages channels, event watchers, deduplication, and dispatch.
+    Can run in server mode (background threads) or CLI mode (one-shot).
+    """
+    
+    def __init__(self):
+        self._channels: Dict[str, Any] = {}  # channel_name -> channel_instance
+        self._event_queue: Queue = Queue()
+        self._running = False
+        self._config: Dict[str, str] = {}
+        self._enabled = False
+        self._lock = threading.Lock()
+        
+        # Watchers
+        self._journal_watcher: Optional[JournalWatcher] = None
+        self._task_watcher: Optional[TaskWatcher] = None
+        self._polling_collector: Optional[PollingCollector] = None
+        self._dispatch_thread: Optional[threading.Thread] = None
+        
+        # Webhook receiver (no thread, passive)
+        self._hook_watcher: Optional[ProxmoxHookWatcher] = None
+        
+        # Cooldown tracking: {fingerprint: last_sent_timestamp}
+        self._cooldowns: Dict[str, float] = {}
+        
+        # Storm protection
+        self._group_limiter = GroupRateLimiter()
+        self._aggregator = BurstAggregator()
+        self._aggregation_thread: Optional[threading.Thread] = None
+        
+        # Stats
+        self._stats = {
+            'started_at': None,
+            'total_sent': 0,
+            'total_errors': 0,
+            'last_sent_at': None,
+        }
+    
+    # ─── Configuration ──────────────────────────────────────────
+    
+    def _load_config(self):
+        """Load notification settings from the shared SQLite database."""
+        self._config = {}
+        try:
+            if not DB_PATH.exists():
+                return
+            
+            conn = sqlite3.connect(str(DB_PATH), timeout=10)
+            conn.execute('PRAGMA journal_mode=WAL')
+            conn.execute('PRAGMA busy_timeout=5000')
+            cursor = conn.cursor()
+            cursor.execute(
+                'SELECT setting_key, setting_value FROM user_settings WHERE setting_key LIKE ?',
+                (f'{SETTINGS_PREFIX}%',)
+            )
+            for key, value in cursor.fetchall():
+                # Strip prefix for internal use
+                short_key = key[len(SETTINGS_PREFIX):]
+                self._config[short_key] = value
+            conn.close()
+        except Exception as e:
+            print(f"[NotificationManager] Failed to load config: {e}")
+        
+        # Reconcile per-event toggles with current template defaults.
+        # If a template's default_enabled was changed (e.g. state_change False),
+        # but the DB has a stale 'true' from a previous default, fix it now.
+        # Only override if the user hasn't explicitly set it (we track this with
+        # a sentinel: if the value came from auto-save of defaults, it may be stale).
+        for event_type, tmpl in TEMPLATES.items():
+            key = f'event.{event_type}'
+            if key in self._config:
+                db_val = self._config[key] == 'true'
+                tmpl_default = tmpl.get('default_enabled', True)
+                # If template says disabled but DB says enabled, AND there's no
+                # explicit user marker, enforce the template default.
+                if not tmpl_default and db_val:
+                    # Check if user explicitly enabled it (look for a marker)
+                    marker = f'event_explicit.{event_type}'
+                    if marker not in self._config:
+                        self._config[key] = 'false'
+        
+        self._enabled = self._config.get('enabled', 'false') == 'true'
+        self._rebuild_channels()
+    
+    def _save_setting(self, key: str, value: str):
+        """Save a single notification setting to the database."""
+        full_key = f'{SETTINGS_PREFIX}{key}'
+        now = datetime.now().isoformat()
+        try:
+            conn = sqlite3.connect(str(DB_PATH), timeout=10)
+            conn.execute('PRAGMA journal_mode=WAL')
+            conn.execute('PRAGMA busy_timeout=5000')
+            cursor = conn.cursor()
+            cursor.execute('''
+                INSERT OR REPLACE INTO user_settings (setting_key, setting_value, updated_at)
+                VALUES (?, ?, ?)
+            ''', (full_key, value, now))
+            conn.commit()
+            conn.close()
+            self._config[key] = value
+        except Exception as e:
+            print(f"[NotificationManager] Failed to save setting {key}: {e}")
+    
+    def _rebuild_channels(self):
+        """Rebuild channel instances from current config."""
+        self._channels = {}
+        
+        for ch_type in CHANNEL_TYPES:
+            enabled_key = f'{ch_type}.enabled'
+            if self._config.get(enabled_key) != 'true':
+                continue
+            
+            # Gather config keys for this channel
+            ch_config = {}
+            for config_key in CHANNEL_TYPES[ch_type]['config_keys']:
+                full_key = f'{ch_type}.{config_key}'
+                ch_config[config_key] = self._config.get(full_key, '')
+            
+            channel = create_channel(ch_type, ch_config)
+            if channel:
+                valid, err = channel.validate_config()
+                if valid:
+                    self._channels[ch_type] = channel
+                else:
+                    print(f"[NotificationManager] Channel {ch_type} invalid: {err}")
+    
+    def reload_config(self):
+        """Reload config from DB without restarting."""
+        with self._lock:
+            self._load_config()
+        return {'success': True, 'channels': list(self._channels.keys())}
+    
+    # ─── Server Mode (Background) ──────────────────────────────
+    
+    def start(self):
+        """Start the notification service in server mode.
+        
+        Launches watchers and dispatch loop as daemon threads.
+        Called by flask_server.py on startup.
+        """
+        if self._running:
+            return
+        
+        self._load_config()
+        self._load_cooldowns_from_db()
+        
+        if not self._enabled:
+            print("[NotificationManager] Service is disabled. Skipping start.")
+            return
+        
+        self._running = True
+        self._stats['started_at'] = datetime.now().isoformat()
+        
+        # Ensure PVE webhook is configured (repairs priv config if missing)
+        try:
+            from flask_notification_routes import setup_pve_webhook_core
+            wh_result = setup_pve_webhook_core()
+            if wh_result.get('configured'):
+                print("[NotificationManager] PVE webhook configured OK.")
+            elif wh_result.get('error'):
+                print(f"[NotificationManager] PVE webhook warning: {wh_result['error']}")
+        except ImportError:
+            pass  # flask_notification_routes not loaded yet (early startup)
+        except Exception as e:
+            print(f"[NotificationManager] PVE webhook setup error: {e}")
+        
+        # Start event watchers
+        self._journal_watcher = JournalWatcher(self._event_queue)
+        self._task_watcher = TaskWatcher(self._event_queue)
+        self._polling_collector = PollingCollector(self._event_queue)
+        
+        self._journal_watcher.start()
+        self._task_watcher.start()
+        self._polling_collector.start()
+        
+        # Start dispatch loop
+        self._dispatch_thread = threading.Thread(
+            target=self._dispatch_loop, daemon=True, name='notification-dispatch'
+        )
+        self._dispatch_thread.start()
+        
+        print(f"[NotificationManager] Started with channels: {list(self._channels.keys())}")
+    
+    def stop(self):
+        """Stop the notification service cleanly."""
+        self._running = False
+        
+        if self._journal_watcher:
+            self._journal_watcher.stop()
+        if self._task_watcher:
+            self._task_watcher.stop()
+        if self._polling_collector:
+            self._polling_collector.stop()
+        
+        print("[NotificationManager] Stopped.")
+    
+    def _dispatch_loop(self):
+        """Main dispatch loop: reads queue -> filters -> formats -> sends -> records."""
+        last_cleanup = time.monotonic()
+        last_flush = time.monotonic()
+        cleanup_interval = 3600  # Cleanup cooldowns every hour
+        flush_interval = 5       # Flush aggregation buckets every 5s
+        
+        while self._running:
+            try:
+                event = self._event_queue.get(timeout=2)
+            except Empty:
+                # Periodic maintenance during idle
+                now_mono = time.monotonic()
+                if now_mono - last_cleanup > cleanup_interval:
+                    self._cleanup_old_cooldowns()
+                    last_cleanup = now_mono
+                # Flush expired aggregation buckets
+                if now_mono - last_flush > flush_interval:
+                    self._flush_aggregation()
+                    last_flush = now_mono
+                continue
+            
+            try:
+                self._process_event(event)
+            except Exception as e:
+                print(f"[NotificationManager] Dispatch error: {e}")
+            
+            # Also flush aggregation after each event
+            if time.monotonic() - last_flush > flush_interval:
+                self._flush_aggregation()
+                last_flush = time.monotonic()
+    
+    def _flush_aggregation(self):
+        """Flush expired aggregation buckets and dispatch summaries."""
+        try:
+            summaries = self._aggregator.flush_expired()
+            for summary_event in summaries:
+                # Burst summaries bypass aggregator but still pass cooldown + rate limit
+                self._process_event_direct(summary_event)
+        except Exception as e:
+            print(f"[NotificationManager] Aggregation flush error: {e}")
+    
+    def _process_event(self, event: NotificationEvent):
+        """Process a single event: filter -> aggregate -> cooldown -> rate limit -> dispatch."""
+        if not self._enabled:
+            return
+        
+        # Check if this event's GROUP is enabled in settings.
+        # The UI saves categories by group key: events.vm_ct, events.backup, etc.
+        template = TEMPLATES.get(event.event_type, {})
+        event_group = template.get('group', 'system')
+        group_setting = f'events.{event_group}'
+        if self._config.get(group_setting, 'true') == 'false':
+            return
+        
+        # Check if this SPECIFIC event type is enabled (granular per-event toggle).
+        # Key format: event.{event_type} = "true"/"false"
+        # Default comes from the template's default_enabled field.
+        default_enabled = 'true' if template.get('default_enabled', True) else 'false'
+        event_specific = f'event.{event.event_type}'
+        if self._config.get(event_specific, default_enabled) == 'false':
+            return
+        
+        # Check severity filter.
+        # The UI saves severity_filter as: "all", "warning", "critical".
+        # Map to our internal severity names for comparison.
+        severity_map = {'all': 'INFO', 'warning': 'WARNING', 'critical': 'CRITICAL'}
+        raw_filter = self._config.get('severity_filter', 'all')
+        min_severity = severity_map.get(raw_filter.lower(), 'INFO')
+        if not self._meets_severity(event.severity, min_severity):
+            return
+        
+        # Try aggregation (may buffer the event)
+        result = self._aggregator.ingest(event)
+        if result is None:
+            return  # Buffered, will be flushed as summary later
+        event = result  # Use original event (first in burst passes through)
+        
+        # From here, proceed with dispatch (shared with _process_event_direct)
+        self._dispatch_event(event)
+    
+    def _process_event_direct(self, event: NotificationEvent):
+        """Process a burst summary event. Bypasses aggregator but applies ALL other filters."""
+        if not self._enabled:
+            return
+        
+        # Check group filter (same as _process_event)
+        template = TEMPLATES.get(event.event_type, {})
+        event_group = template.get('group', 'system')
+        group_setting = f'events.{event_group}'
+        if self._config.get(group_setting, 'true') == 'false':
+            return
+        
+        # Check per-event filter (same as _process_event)
+        default_enabled = 'true' if template.get('default_enabled', True) else 'false'
+        event_specific = f'event.{event.event_type}'
+        if self._config.get(event_specific, default_enabled) == 'false':
+            return
+        
+        # Check severity filter (same mapping as _process_event)
+        severity_map = {'all': 'INFO', 'warning': 'WARNING', 'critical': 'CRITICAL'}
+        raw_filter = self._config.get('severity_filter', 'all')
+        min_severity = severity_map.get(raw_filter.lower(), 'INFO')
+        if not self._meets_severity(event.severity, min_severity):
+            return
+        
+        self._dispatch_event(event)
+    
+    def _dispatch_event(self, event: NotificationEvent):
+        """Shared dispatch pipeline: cooldown -> rate limit -> render -> send."""
+        # Check cooldown
+        if not self._check_cooldown(event):
+            return
+        
+        # Check group rate limit
+        template = TEMPLATES.get(event.event_type, {})
+        group = template.get('group', 'system')
+        if not self._group_limiter.allow(group):
+            return
+        
+        # Use the properly mapped severity from the event, not from template defaults.
+        # event.severity was set by _map_severity which normalises to CRITICAL/WARNING/INFO.
+        severity = event.severity
+        
+        # Inject the canonical severity into data so templates see it too.
+        event.data['severity'] = severity
+        
+        # Render message from template (structured output)
+        rendered = render_template(event.event_type, event.data)
+        
+        # Optional AI enhancement (on text body only)
+        ai_config = {
+            'enabled': self._config.get('ai_enabled', 'false'),
+            'provider': self._config.get('ai_provider', ''),
+            'api_key': self._config.get('ai_api_key', ''),
+            'model': self._config.get('ai_model', ''),
+        }
+        body = format_with_ai(
+            rendered['title'], rendered['body'], severity, ai_config
+        )
+        
+        # Enrich data with structured fields for channels that support them
+        enriched_data = dict(event.data)
+        enriched_data['_rendered_fields'] = rendered.get('fields', [])
+        enriched_data['_body_html'] = rendered.get('body_html', '')
+        
+        # Send through all active channels
+        self._dispatch_to_channels(
+            rendered['title'], body, severity,
+            event.event_type, enriched_data, event.source
+        )
+    
+    def _dispatch_to_channels(self, title: str, body: str, severity: str,
+                               event_type: str, data: Dict, source: str):
+        """Send notification through all configured channels."""
+        with self._lock:
+            channels = dict(self._channels)
+        
+        for ch_name, channel in channels.items():
+            try:
+                result = channel.send(title, body, severity, data)
+                self._record_history(
+                    event_type, ch_name, title, body, severity,
+                    result.get('success', False),
+                    result.get('error', ''),
+                    source
+                )
+                
+                if result.get('success'):
+                    self._stats['total_sent'] += 1
+                    self._stats['last_sent_at'] = datetime.now().isoformat()
+                else:
+                    self._stats['total_errors'] += 1
+                    print(f"[NotificationManager] Send failed ({ch_name}): {result.get('error')}")
+                    
+            except Exception as e:
+                self._stats['total_errors'] += 1
+                self._record_history(
+                    event_type, ch_name, title, body, severity,
+                    False, str(e), source
+                )
+    
+    # ─── Cooldown / Dedup ───────────────────────────────────────
+    
+    def _check_cooldown(self, event: NotificationEvent) -> bool:
+        """Check if the event passes cooldown rules."""
+        now = time.time()
+        
+        # Determine cooldown period
+        template = TEMPLATES.get(event.event_type, {})
+        group = template.get('group', 'system')
+        
+        # Priority: per-type config > per-severity > default
+        cooldown_key = f'cooldown.{event.event_type}'
+        cooldown_str = self._config.get(cooldown_key)
+        
+        if cooldown_str is None:
+            cooldown_key_group = f'cooldown.{group}'
+            cooldown_str = self._config.get(cooldown_key_group)
+        
+        if cooldown_str is not None:
+            cooldown = int(cooldown_str)
+        else:
+            cooldown = DEFAULT_COOLDOWNS.get(event.severity, 300)
+        
+        # CRITICAL events: 60s minimum cooldown (prevents storm, but delivers fast)
+        if event.severity == 'CRITICAL' and cooldown_str is None:
+            cooldown = 60
+        
+        # Backup/replication events: each execution is unique and should
+        # always be delivered. A 10s cooldown prevents exact duplicates
+        # (webhook + tasks) but allows repeated backup jobs to report.
+        _ALWAYS_DELIVER = {'backup_complete', 'backup_fail', 'backup_start',
+                           'replication_complete', 'replication_fail'}
+        if event.event_type in _ALWAYS_DELIVER and cooldown_str is None:
+            cooldown = 10
+        
+        # VM/CT state changes are real user actions that should always be
+        # delivered. Each start/stop/shutdown is a distinct event.  A 5s
+        # cooldown prevents exact duplicates from concurrent watchers.
+        _STATE_EVENTS = {
+            'vm_start', 'vm_stop', 'vm_shutdown', 'vm_restart',
+            'ct_start', 'ct_stop', 'ct_shutdown', 'ct_restart',
+            'vm_fail', 'ct_fail',
+        }
+        if event.event_type in _STATE_EVENTS and cooldown_str is None:
+            cooldown = 5
+        
+        # System shutdown/reboot must be delivered immediately -- the node
+        # is going down and there may be only seconds to send the message.
+        _URGENT_EVENTS = {'system_shutdown', 'system_reboot'}
+        if event.event_type in _URGENT_EVENTS and cooldown_str is None:
+            cooldown = 5
+        
+        # Check against last sent time using stable fingerprint
+        last_sent = self._cooldowns.get(event.fingerprint, 0)
+        
+        if now - last_sent < cooldown:
+            return False
+        
+        self._cooldowns[event.fingerprint] = now
+        self._persist_cooldown(event.fingerprint, now)
+        return True
+    
+    def _load_cooldowns_from_db(self):
+        """Load persistent cooldown state from SQLite (up to 48h)."""
+        try:
+            if not DB_PATH.exists():
+                return
+            conn = sqlite3.connect(str(DB_PATH), timeout=10)
+            conn.execute('PRAGMA journal_mode=WAL')
+            cursor = conn.cursor()
+            cursor.execute('SELECT fingerprint, last_sent_ts FROM notification_last_sent')
+            now = time.time()
+            for fp, ts in cursor.fetchall():
+                if now - ts < 172800:  # 48h window
+                    self._cooldowns[fp] = ts
+            conn.close()
+        except Exception as e:
+            print(f"[NotificationManager] Failed to load cooldowns: {e}")
+    
+    def _persist_cooldown(self, fingerprint: str, ts: float):
+        """Save cooldown timestamp to SQLite for restart persistence."""
+        try:
+            conn = sqlite3.connect(str(DB_PATH), timeout=10)
+            conn.execute('PRAGMA journal_mode=WAL')
+            conn.execute('PRAGMA busy_timeout=5000')
+            conn.execute('''
+                INSERT OR REPLACE INTO notification_last_sent (fingerprint, last_sent_ts, count)
+                VALUES (?, ?, COALESCE(
+                    (SELECT count + 1 FROM notification_last_sent WHERE fingerprint = ?), 1
+                ))
+            ''', (fingerprint, int(ts), fingerprint))
+            conn.commit()
+            conn.close()
+        except Exception:
+            pass  # Non-critical, in-memory cooldown still works
+    
+    def _cleanup_old_cooldowns(self):
+        """Remove cooldown entries older than 48h from both memory and DB."""
+        cutoff = time.time() - 172800  # 48h
+        self._cooldowns = {k: v for k, v in self._cooldowns.items() if v > cutoff}
+        try:
+            conn = sqlite3.connect(str(DB_PATH), timeout=10)
+            conn.execute('PRAGMA journal_mode=WAL')
+            conn.execute('DELETE FROM notification_last_sent WHERE last_sent_ts < ?', (int(cutoff),))
+            conn.commit()
+            conn.close()
+        except Exception:
+            pass
+    
+    @staticmethod
+    def _meets_severity(event_severity: str, min_severity: str) -> bool:
+        """Check if event severity meets the minimum threshold."""
+        levels = {'INFO': 0, 'WARNING': 1, 'CRITICAL': 2}
+        return levels.get(event_severity, 0) >= levels.get(min_severity, 0)
+    
+    # ─── History Recording ──────────────────────────────────────
+    
+    def _record_history(self, event_type: str, channel: str, title: str,
+                        message: str, severity: str, success: bool,
+                        error_message: str, source: str):
+        """Record a notification attempt in the history table."""
+        try:
+            conn = sqlite3.connect(str(DB_PATH), timeout=10)
+            conn.execute('PRAGMA journal_mode=WAL')
+            conn.execute('PRAGMA busy_timeout=5000')
+            cursor = conn.cursor()
+            cursor.execute('''
+                INSERT INTO notification_history
+                (event_type, channel, title, message, severity, sent_at, success, error_message, source)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+            ''', (
+                event_type, channel, title, message[:500], severity,
+                datetime.now().isoformat(), 1 if success else 0,
+                error_message[:500] if error_message else None, source
+            ))
+            conn.commit()
+            conn.close()
+        except Exception as e:
+            print(f"[NotificationManager] History record error: {e}")
+    
+    # ─── Public API (used by Flask routes and CLI) ──────────────
+    
+    def send_notification(self, event_type: str, severity: str,
+                          title: str, message: str,
+                          data: Optional[Dict] = None,
+                          source: str = 'api') -> Dict[str, Any]:
+        """Send a notification directly (bypasses queue and cooldown).
+        
+        Used by CLI and API for explicit sends.
+        """
+        if not self._channels:
+            self._load_config()
+        
+        if not self._channels:
+            return {
+                'success': False,
+                'error': 'No channels configured or enabled',
+                'channels_sent': [],
+            }
+        
+        # Render template if available
+        if event_type in TEMPLATES and not message:
+            rendered = render_template(event_type, data or {})
+            title = title or rendered['title']
+            message = rendered['body']
+            severity = severity or rendered['severity']
+        
+        # AI enhancement
+        ai_config = {
+            'enabled': self._config.get('ai_enabled', 'false'),
+            'provider': self._config.get('ai_provider', ''),
+            'api_key': self._config.get('ai_api_key', ''),
+            'model': self._config.get('ai_model', ''),
+        }
+        message = format_with_ai(title, message, severity, ai_config)
+        
+        results = {}
+        channels_sent = []
+        errors = []
+        
+        with self._lock:
+            channels = dict(self._channels)
+        
+        for ch_name, channel in channels.items():
+            try:
+                result = channel.send(title, message, severity, data)
+                results[ch_name] = result
+                
+                self._record_history(
+                    event_type, ch_name, title, message, severity,
+                    result.get('success', False),
+                    result.get('error', ''),
+                    source
+                )
+                
+                if result.get('success'):
+                    channels_sent.append(ch_name)
+                else:
+                    errors.append(f"{ch_name}: {result.get('error')}")
+            except Exception as e:
+                errors.append(f"{ch_name}: {str(e)}")
+        
+        return {
+            'success': len(channels_sent) > 0,
+            'channels_sent': channels_sent,
+            'errors': errors,
+            'total_channels': len(channels),
+        }
+    
+    def send_raw(self, title: str, message: str,
+                 severity: str = 'INFO',
+                 source: str = 'api') -> Dict[str, Any]:
+        """Send a raw message without template (for custom scripts)."""
+        return self.send_notification(
+            'custom', severity, title, message, source=source
+        )
+    
+    def test_channel(self, channel_name: str = 'all') -> Dict[str, Any]:
+        """Test one or all configured channels."""
+        if not self._channels:
+            self._load_config()
+        
+        if not self._channels:
+            return {'success': False, 'error': 'No channels configured'}
+        
+        results = {}
+        
+        if channel_name == 'all':
+            targets = dict(self._channels)
+        elif channel_name in self._channels:
+            targets = {channel_name: self._channels[channel_name]}
+        else:
+            # Try to create channel from config even if not enabled
+            ch_config = {}
+            for config_key in CHANNEL_TYPES.get(channel_name, {}).get('config_keys', []):
+                ch_config[config_key] = self._config.get(f'{channel_name}.{config_key}', '')
+            
+            channel = create_channel(channel_name, ch_config)
+            if channel:
+                targets = {channel_name: channel}
+            else:
+                return {'success': False, 'error': f'Channel {channel_name} not configured'}
+        
+        for ch_name, channel in targets.items():
+            success, error = channel.test()
+            results[ch_name] = {'success': success, 'error': error}
+            
+            self._record_history(
+                'test', ch_name, 'ProxMenux Test',
+                'Test notification', 'INFO',
+                success, error, 'api'
+            )
+        
+        overall_success = any(r['success'] for r in results.values())
+        return {
+            'success': overall_success,
+            'results': results,
+        }
+    
+    # ─── Proxmox Webhook ──────────────────────────────────────────
+    
+    def process_webhook(self, payload: dict) -> dict:
+        """Process incoming Proxmox webhook. Delegates to ProxmoxHookWatcher."""
+        if not self._hook_watcher:
+            self._hook_watcher = ProxmoxHookWatcher(self._event_queue)
+        return self._hook_watcher.process_webhook(payload)
+    
+    def get_webhook_secret(self) -> str:
+        """Get configured webhook secret, or empty string if none."""
+        if not self._config:
+            self._load_config()
+        return self._config.get('webhook_secret', '')
+    
+    def get_webhook_allowed_ips(self) -> list:
+        """Get list of allowed IPs for webhook, or empty list (allow all)."""
+        if not self._config:
+            self._load_config()
+        raw = self._config.get('webhook_allowed_ips', '')
+        if not raw:
+            return []
+        return [ip.strip() for ip in str(raw).split(',') if ip.strip()]
+    
+    # ─── Status & Settings ──────────────────────────────────────
+    
+    def get_status(self) -> Dict[str, Any]:
+        """Get current service status."""
+        if not self._config:
+            self._load_config()
+        
+        return {
+            'enabled': self._enabled,
+            'running': self._running,
+            'channels': {
+                name: {
+                    'type': name,
+                    'connected': True,
+                }
+                for name in self._channels
+            },
+            'stats': self._stats,
+            'watchers': {
+                'journal': self._journal_watcher is not None and self._running,
+                'task': self._task_watcher is not None and self._running,
+                'polling': self._polling_collector is not None and self._running,
+            },
+        }
+    
+    def set_enabled(self, enabled: bool) -> Dict[str, Any]:
+        """Enable or disable the notification service."""
+        self._save_setting('enabled', 'true' if enabled else 'false')
+        self._enabled = enabled
+        
+        if enabled and not self._running:
+            self.start()
+        elif not enabled and self._running:
+            self.stop()
+        
+        return {'success': True, 'enabled': enabled}
+    
+    def list_channels(self) -> Dict[str, Any]:
+        """List all channel types with their configuration status."""
+        if not self._config:
+            self._load_config()
+        
+        channels_info = {}
+        for ch_type, info in CHANNEL_TYPES.items():
+            enabled = self._config.get(f'{ch_type}.enabled', 'false') == 'true'
+            configured = all(
+                bool(self._config.get(f'{ch_type}.{k}', ''))
+                for k in info['config_keys']
+            )
+            channels_info[ch_type] = {
+                'name': info['name'],
+                'enabled': enabled,
+                'configured': configured,
+                'active': ch_type in self._channels,
+            }
+        
+        return {'channels': channels_info}
+    
+    def get_history(self, limit: int = 50, offset: int = 0,
+                    severity: str = '', channel: str = '') -> Dict[str, Any]:
+        """Get notification history with optional filters."""
+        try:
+            conn = sqlite3.connect(str(DB_PATH), timeout=10)
+            conn.execute('PRAGMA journal_mode=WAL')
+            conn.execute('PRAGMA busy_timeout=5000')
+            conn.row_factory = sqlite3.Row
+            cursor = conn.cursor()
+            
+            query = 'SELECT * FROM notification_history WHERE 1=1'
+            params: list = []
+            
+            if severity:
+                query += ' AND severity = ?'
+                params.append(severity)
+            if channel:
+                query += ' AND channel = ?'
+                params.append(channel)
+            
+            query += ' ORDER BY sent_at DESC LIMIT ? OFFSET ?'
+            params.extend([limit, offset])
+            
+            cursor.execute(query, params)
+            rows = [dict(row) for row in cursor.fetchall()]
+            
+            # Get total count
+            count_query = 'SELECT COUNT(*) FROM notification_history WHERE 1=1'
+            count_params: list = []
+            if severity:
+                count_query += ' AND severity = ?'
+                count_params.append(severity)
+            if channel:
+                count_query += ' AND channel = ?'
+                count_params.append(channel)
+            
+            cursor.execute(count_query, count_params)
+            total = cursor.fetchone()[0]
+            
+            conn.close()
+            
+            return {
+                'history': rows,
+                'total': total,
+                'limit': limit,
+                'offset': offset,
+            }
+        except Exception as e:
+            return {'history': [], 'total': 0, 'error': str(e)}
+    
+    def clear_history(self) -> Dict[str, Any]:
+        """Clear all notification history."""
+        try:
+            conn = sqlite3.connect(str(DB_PATH), timeout=10)
+            conn.execute('PRAGMA journal_mode=WAL')
+            conn.execute('PRAGMA busy_timeout=5000')
+            conn.execute('DELETE FROM notification_history')
+            conn.commit()
+            conn.close()
+            return {'success': True}
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    
+    def get_settings(self) -> Dict[str, Any]:
+        """Get all notification settings for the UI.
+        
+        Returns a structure matching the frontend's NotificationConfig shape
+        so the round-trip (GET -> edit -> POST) is seamless.
+        """
+        if not self._config:
+            self._load_config()
+        
+        # Build nested channels object matching frontend ChannelConfig
+        channels = {}
+        for ch_type, info in CHANNEL_TYPES.items():
+            ch_cfg: Dict[str, Any] = {
+                'enabled': self._config.get(f'{ch_type}.enabled', 'false') == 'true',
+            }
+            for config_key in info['config_keys']:
+                ch_cfg[config_key] = self._config.get(f'{ch_type}.{config_key}', '')
+            channels[ch_type] = ch_cfg
+        
+        # Build event_categories dict (group-level toggle)
+        # EVENT_GROUPS is a dict: { 'system': {...}, 'vm_ct': {...}, ... }
+        event_categories = {}
+        for group_key in EVENT_GROUPS:
+            event_categories[group_key] = self._config.get(f'events.{group_key}', 'true') == 'true'
+        
+        # Build per-event toggles: { 'vm_start': true, 'vm_stop': false, ... }
+        event_toggles = {}
+        for event_type, tmpl in TEMPLATES.items():
+            default = tmpl.get('default_enabled', True)
+            saved = self._config.get(f'event.{event_type}', None)
+            if saved is not None:
+                event_toggles[event_type] = saved == 'true'
+            else:
+                event_toggles[event_type] = default
+        
+        # Build event_types_by_group for UI rendering
+        event_types_by_group = get_event_types_by_group()
+        
+        config = {
+            'enabled': self._enabled,
+            'channels': channels,
+            'severity_filter': self._config.get('severity_filter', 'all'),
+            'event_categories': event_categories,
+            'event_toggles': event_toggles,
+            'event_types_by_group': event_types_by_group,
+            'ai_enabled': self._config.get('ai_enabled', 'false') == 'true',
+            'ai_provider': self._config.get('ai_provider', 'openai'),
+            'ai_api_key': self._config.get('ai_api_key', ''),
+            'ai_model': self._config.get('ai_model', ''),
+            'hostname': self._config.get('hostname', ''),
+            'webhook_secret': self._config.get('webhook_secret', ''),
+            'webhook_allowed_ips': self._config.get('webhook_allowed_ips', ''),
+            'pbs_host': self._config.get('pbs_host', ''),
+            'pve_host': self._config.get('pve_host', ''),
+            'pbs_trusted_sources': self._config.get('pbs_trusted_sources', ''),
+        }
+        
+        return {
+            'success': True,
+            'config': config,
+        }
+    
+    def save_settings(self, settings: Dict[str, str]) -> Dict[str, Any]:
+        """Save multiple notification settings at once."""
+        try:
+            conn = sqlite3.connect(str(DB_PATH), timeout=10)
+            conn.execute('PRAGMA journal_mode=WAL')
+            conn.execute('PRAGMA busy_timeout=5000')
+            cursor = conn.cursor()
+            now = datetime.now().isoformat()
+            
+            for key, value in settings.items():
+                # Accept both prefixed and unprefixed keys
+                full_key = key if key.startswith(SETTINGS_PREFIX) else f'{SETTINGS_PREFIX}{key}'
+                short_key = full_key[len(SETTINGS_PREFIX):]
+                
+                cursor.execute('''
+                    INSERT OR REPLACE INTO user_settings (setting_key, setting_value, updated_at)
+                    VALUES (?, ?, ?)
+                ''', (full_key, str(value), now))
+                
+                self._config[short_key] = str(value)
+                
+                # If user is explicitly enabling an event that defaults to disabled,
+                # mark it so _load_config reconciliation won't override it later.
+                if short_key.startswith('event.') and str(value) == 'true':
+                    event_type = short_key[6:]  # strip 'event.'
+                    tmpl = TEMPLATES.get(event_type, {})
+                    if not tmpl.get('default_enabled', True):
+                        marker_key = f'{SETTINGS_PREFIX}event_explicit.{event_type}'
+                        cursor.execute('''
+                            INSERT OR REPLACE INTO user_settings (setting_key, setting_value, updated_at)
+                            VALUES (?, ?, ?)
+                        ''', (marker_key, 'true', now))
+                        self._config[f'event_explicit.{event_type}'] = 'true'
+            
+            conn.commit()
+            conn.close()
+            
+            # Rebuild channels with new config
+            was_enabled = self._enabled
+            self._enabled = self._config.get('enabled', 'false') == 'true'
+            self._rebuild_channels()
+            
+            # Start/stop service and auto-configure PVE webhook
+            pve_webhook_result = None
+            if self._enabled and not was_enabled:
+                # Notifications just got ENABLED -> start service + setup PVE webhook
+                if not self._running:
+                    self.start()
+                try:
+                    from flask_notification_routes import setup_pve_webhook_core
+                    pve_webhook_result = setup_pve_webhook_core()
+                except ImportError:
+                    pass  # flask_notification_routes not available (CLI mode)
+                except Exception as e:
+                    pve_webhook_result = {'configured': False, 'error': str(e)}
+            elif not self._enabled and was_enabled:
+                # Notifications just got DISABLED -> stop service + cleanup PVE webhook
+                if self._running:
+                    self.stop()
+                try:
+                    from flask_notification_routes import cleanup_pve_webhook_core
+                    cleanup_pve_webhook_core()
+                except ImportError:
+                    pass
+                except Exception:
+                    pass
+            
+            result = {'success': True, 'channels_active': list(self._channels.keys())}
+            if pve_webhook_result:
+                result['pve_webhook'] = pve_webhook_result
+            return result
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+
+
+# ─── Singleton (for server mode) ─────────────────────────────────
+
+notification_manager = NotificationManager()
+
+
+# ─── CLI Interface ────────────────────────────────────────────────
+
+def _print_result(result: Dict, as_json: bool):
+    """Print CLI result in human-readable or JSON format."""
+    if as_json:
+        print(json.dumps(result, indent=2, default=str))
+        return
+    
+    if result.get('success'):
+        print(f"OK: ", end='')
+    elif 'success' in result and not result['success']:
+        print(f"ERROR: ", end='')
+    
+    # Format based on content
+    if 'channels_sent' in result:
+        sent = result.get('channels_sent', [])
+        print(f"Sent via: {', '.join(sent) if sent else 'none'}")
+        if result.get('errors'):
+            for err in result['errors']:
+                print(f"  Error: {err}")
+    elif 'results' in result:
+        for ch, r in result['results'].items():
+            status = 'OK' if r['success'] else f"FAILED: {r['error']}"
+            print(f"  {ch}: {status}")
+    elif 'channels' in result:
+        for ch, info in result['channels'].items():
+            status = 'active' if info.get('active') else ('configured' if info.get('configured') else 'not configured')
+            enabled = 'enabled' if info.get('enabled') else 'disabled'
+            print(f"  {info['name']}: {enabled}, {status}")
+    elif 'enabled' in result and 'running' in result:
+        print(f"Enabled: {result['enabled']}, Running: {result['running']}")
+        if result.get('stats'):
+            stats = result['stats']
+            print(f"  Total sent: {stats.get('total_sent', 0)}")
+            print(f"  Total errors: {stats.get('total_errors', 0)}")
+            if stats.get('last_sent_at'):
+                print(f"  Last sent: {stats['last_sent_at']}")
+    elif 'enabled' in result:
+        print(f"Service {'enabled' if result['enabled'] else 'disabled'}")
+    else:
+        print(json.dumps(result, indent=2, default=str))
+
+
+if __name__ == '__main__':
+    import argparse
+    
+    parser = argparse.ArgumentParser(
+        description='ProxMenux Notification Manager CLI',
+        epilog='Example: python3 notification_manager.py --action send --type vm_fail --severity CRITICAL --title "VM 100 failed" --message "QEMU process crashed"'
+    )
+    parser.add_argument('--action', required=True,
+                        choices=['send', 'send-raw', 'test', 'status',
+                                 'enable', 'disable', 'list-channels'],
+                        help='Action to perform')
+    parser.add_argument('--type', help='Event type for send action (e.g. vm_fail, backup_complete)')
+    parser.add_argument('--severity', default='INFO',
+                        choices=['INFO', 'WARNING', 'CRITICAL'],
+                        help='Notification severity (default: INFO)')
+    parser.add_argument('--title', help='Notification title')
+    parser.add_argument('--message', help='Notification message body')
+    parser.add_argument('--channel', default='all',
+                        help='Specific channel for test (default: all)')
+    parser.add_argument('--json', action='store_true',
+                        help='Output result as JSON')
+    
+    args = parser.parse_args()
+    
+    mgr = NotificationManager()
+    mgr._load_config()
+    
+    if args.action == 'send':
+        if not args.type:
+            parser.error('--type is required for send action')
+        result = mgr.send_notification(
+            args.type, args.severity,
+            args.title or '', args.message or '',
+            data={
+                'hostname': socket.gethostname().split('.')[0],
+                'reason': args.message or '',
+            },
+            source='cli'
+        )
+    
+    elif args.action == 'send-raw':
+        if not args.title or not args.message:
+            parser.error('--title and --message are required for send-raw')
+        result = mgr.send_raw(args.title, args.message, args.severity, source='cli')
+    
+    elif args.action == 'test':
+        result = mgr.test_channel(args.channel)
+    
+    elif args.action == 'status':
+        result = mgr.get_status()
+    
+    elif args.action == 'enable':
+        result = mgr.set_enabled(True)
+    
+    elif args.action == 'disable':
+        result = mgr.set_enabled(False)
+    
+    elif args.action == 'list-channels':
+        result = mgr.list_channels()
+    
+    else:
+        result = {'error': f'Unknown action: {args.action}'}
+    
+    _print_result(result, args.json)
+    
+    # Exit with appropriate code
+    sys.exit(0 if result.get('success', True) else 1)
diff --git a/AppImage/scripts/notification_templates.py b/AppImage/scripts/notification_templates.py
new file mode 100644
index 00000000..55371f45
--- /dev/null
+++ b/AppImage/scripts/notification_templates.py
@@ -0,0 +1,958 @@
+"""
+ProxMenux Notification Templates
+Message templates for all event types with per-channel formatting.
+
+Templates use Python str.format() variables:
+  {hostname}, {severity}, {category}, {reason}, {summary},
+  {previous}, {current}, {vmid}, {vmname}, {timestamp}, etc.
+
+Optional AI enhancement enriches messages with context/suggestions.
+
+Author: MacRimi
+"""
+
+import json
+import re
+import socket
+import time
+import urllib.request
+import urllib.error
+from typing import Dict, Any, Optional, List
+
+
+# ─── vzdump message parser ───────────────────────────────────────
+
+def _parse_vzdump_message(message: str) -> Optional[Dict[str, Any]]:
+    """Parse a PVE vzdump notification message into structured data.
+    
+    Supports two formats:
+    1. Local storage: table with columns VMID Name Status Time Size Filename
+    2. PBS storage: log-style output with 'Finished Backup of VM NNN (HH:MM:SS)'
+       and sizes in lines like 'root.pxar: had to backup X of Y' or 'transferred X'
+    
+    Returns dict with 'vms' list, 'total_time', 'total_size', or None.
+    """
+    if not message:
+        return None
+    
+    vms: List[Dict[str, str]] = []
+    total_time = ''
+    total_size = ''
+    
+    lines = message.split('\n')
+    
+    # ── Strategy 1: classic table (local/NFS/CIFS storage) ──
+    header_idx = -1
+    for i, line in enumerate(lines):
+        if re.match(r'\s*VMID\s+Name\s+Status', line, re.IGNORECASE):
+            header_idx = i
+            break
+    
+    if header_idx >= 0:
+        # Use column positions from the header to slice each row.
+        # Header: "VMID    Name           Status    Time      Size          Filename"
+        header = lines[header_idx]
+        col_starts = []
+        for col_name in ['VMID', 'Name', 'Status', 'Time', 'Size', 'Filename']:
+            idx = header.find(col_name)
+            if idx >= 0:
+                col_starts.append(idx)
+        
+        if len(col_starts) == 6:
+            for line in lines[header_idx + 1:]:
+                stripped = line.strip()
+                if not stripped or stripped.startswith('Total') or stripped.startswith('Logs') or stripped.startswith('='):
+                    break
+                # Pad line to avoid index errors
+                padded = line.ljust(col_starts[-1] + 50)
+                vmid = padded[col_starts[0]:col_starts[1]].strip()
+                name = padded[col_starts[1]:col_starts[2]].strip()
+                status = padded[col_starts[2]:col_starts[3]].strip()
+                time_val = padded[col_starts[3]:col_starts[4]].strip()
+                size = padded[col_starts[4]:col_starts[5]].strip()
+                filename = padded[col_starts[5]:].strip()
+                
+                if vmid and vmid.isdigit():
+                    vms.append({
+                        'vmid': vmid,
+                        'name': name,
+                        'status': status,
+                        'time': time_val,
+                        'size': size,
+                        'filename': filename,
+                    })
+    
+    # ── Strategy 2: log-style (PBS / Proxmox Backup Server) ──
+    # Parse from the full vzdump log lines.
+    # Look for patterns:
+    #   "Starting Backup of VM NNN (lxc/qemu)"  -> detect guest
+    #   "CT Name: xxx" or "VM Name: xxx"         -> guest name
+    #   "Finished Backup of VM NNN (HH:MM:SS)"   -> duration + status=ok
+    #   "root.pxar: had to backup X of Y"         -> size (CT)
+    #   "transferred X in N seconds"              -> size (QEMU)
+    #   "creating ... archive 'ct/100/2026-..'"   -> archive name for PBS
+    #   "TASK ERROR:" or "ERROR:"                 -> status=error
+    if not vms:
+        current_vm: Optional[Dict[str, str]] = None
+        
+        for line in lines:
+            # Remove "INFO: " prefix that PVE adds
+            clean = re.sub(r'^(?:INFO|WARNING|ERROR):\s*', '', line.strip())
+            
+            # Start of a new VM backup
+            m_start = re.match(
+                r'Starting Backup of VM (\d+)\s+\((lxc|qemu)\)', clean)
+            if m_start:
+                if current_vm:
+                    vms.append(current_vm)
+                current_vm = {
+                    'vmid': m_start.group(1),
+                    'name': '',
+                    'status': 'ok',
+                    'time': '',
+                    'size': '',
+                    'filename': '',
+                    'type': m_start.group(2),
+                }
+                continue
+            
+            if current_vm:
+                # Guest name
+                m_name = re.match(r'(?:CT|VM) Name:\s*(.+)', clean)
+                if m_name:
+                    current_vm['name'] = m_name.group(1).strip()
+                    continue
+                
+                # PBS archive path -> extract as filename
+                m_archive = re.search(
+                    r"creating .+ archive '([^']+)'", clean)
+                if m_archive:
+                    current_vm['filename'] = m_archive.group(1)
+                    continue
+                
+                # Size for containers (pxar)
+                m_pxar = re.search(
+                    r'root\.pxar:.*?of\s+([\d.]+\s+\S+)', clean)
+                if m_pxar:
+                    current_vm['size'] = m_pxar.group(1)
+                    continue
+                
+                # Size for QEMU (transferred)
+                m_transfer = re.search(
+                    r'transferred\s+([\d.]+\s+\S+)', clean)
+                if m_transfer:
+                    current_vm['size'] = m_transfer.group(1)
+                    continue
+                
+                # Finished -> duration
+                m_finish = re.match(
+                    r'Finished Backup of VM (\d+)\s+\(([^)]+)\)', clean)
+                if m_finish:
+                    current_vm['time'] = m_finish.group(2)
+                    current_vm['status'] = 'ok'
+                    vms.append(current_vm)
+                    current_vm = None
+                    continue
+                
+                # Error
+                if clean.startswith('ERROR:') or clean.startswith('TASK ERROR'):
+                    if current_vm:
+                        current_vm['status'] = 'error'
+        
+        # Don't forget the last VM if it wasn't finished
+        if current_vm:
+            vms.append(current_vm)
+    
+    # ── Extract totals ──
+    for line in lines:
+        m_time = re.search(r'Total running time:\s*(.+)', line)
+        if m_time:
+            total_time = m_time.group(1).strip()
+        m_size = re.search(r'Total size:\s*(.+)', line)
+        if m_size:
+            total_size = m_size.group(1).strip()
+    
+    # For PBS: calculate total size if not explicitly stated
+    if not total_size and vms:
+        # Sum individual sizes if they share units
+        sizes_gib = 0.0
+        for vm in vms:
+            s = vm.get('size', '')
+            m = re.match(r'([\d.]+)\s+(.*)', s)
+            if m:
+                val = float(m.group(1))
+                unit = m.group(2).strip().upper()
+                if 'GIB' in unit or 'GB' in unit:
+                    sizes_gib += val
+                elif 'MIB' in unit or 'MB' in unit:
+                    sizes_gib += val / 1024
+                elif 'TIB' in unit or 'TB' in unit:
+                    sizes_gib += val * 1024
+        if sizes_gib > 0:
+            if sizes_gib >= 1024:
+                total_size = f"{sizes_gib / 1024:.3f} TiB"
+            elif sizes_gib >= 1:
+                total_size = f"{sizes_gib:.3f} GiB"
+            else:
+                total_size = f"{sizes_gib * 1024:.3f} MiB"
+    
+    # For PBS: calculate total time if not stated
+    if not total_time and vms:
+        total_secs = 0
+        for vm in vms:
+            t = vm.get('time', '')
+            # Parse HH:MM:SS format
+            m = re.match(r'(\d+):(\d+):(\d+)', t)
+            if m:
+                total_secs += int(m.group(1)) * 3600 + int(m.group(2)) * 60 + int(m.group(3))
+        if total_secs > 0:
+            hours = total_secs // 3600
+            mins = (total_secs % 3600) // 60
+            secs = total_secs % 60
+            if hours:
+                total_time = f"{hours}h {mins}m {secs}s"
+            elif mins:
+                total_time = f"{mins}m {secs}s"
+            else:
+                total_time = f"{secs}s"
+    
+    if not vms and not total_size:
+        return None
+    
+    return {
+        'vms': vms,
+        'total_time': total_time,
+        'total_size': total_size,
+        'vm_count': len(vms),
+    }
+
+
+def _format_vzdump_body(parsed: Dict[str, Any], is_success: bool) -> str:
+    """Format parsed vzdump data into a clean Telegram-friendly message."""
+    parts = []
+    
+    for vm in parsed.get('vms', []):
+        status = vm.get('status', '').lower()
+        icon = '\u2705' if status == 'ok' else '\u274C'
+        
+        parts.append(f"{icon} ID {vm['vmid']} ({vm['name']})")
+        
+        details = []
+        if vm.get('size'):
+            details.append(f"Size: {vm['size']}")
+        if vm.get('time'):
+            details.append(f"Duration: {vm['time']}")
+        if vm.get('filename'):
+            fname = vm['filename']
+            # PBS archives look like "ct/100/2026-..." or "vm/105/2026-..."
+            if re.match(r'^(?:ct|vm)/\d+/', fname):
+                details.append(f"PBS: {fname}")
+            else:
+                details.append(f"File: {fname}")
+        if details:
+            parts.append(' | '.join(details))
+        parts.append('')  # blank line between VMs
+    
+    # Summary
+    vm_count = parsed.get('vm_count', 0)
+    if vm_count > 0 or parsed.get('total_size'):
+        ok_count = sum(1 for v in parsed.get('vms', [])
+                       if v.get('status', '').lower() == 'ok')
+        fail_count = vm_count - ok_count
+        
+        summary_parts = []
+        if vm_count:
+            summary_parts.append(f"{vm_count} backup(s)")
+        if fail_count:
+            summary_parts.append(f"{fail_count} failed")
+        if parsed.get('total_size'):
+            summary_parts.append(f"Total: {parsed['total_size']}")
+        if parsed.get('total_time'):
+            summary_parts.append(f"Time: {parsed['total_time']}")
+        
+        if summary_parts:
+            parts.append('--- ' + ' | '.join(summary_parts))
+    
+    return '\n'.join(parts)
+
+
+# ─── Severity Icons ──────────────────────────────────────────────
+
+SEVERITY_ICONS = {
+    'CRITICAL': '\U0001F534',
+    'WARNING':  '\U0001F7E1',
+    'INFO':     '\U0001F535',
+    'OK':       '\U0001F7E2',
+    'UNKNOWN':  '\u26AA',
+}
+
+SEVERITY_ICONS_DISCORD = {
+    'CRITICAL': ':red_circle:',
+    'WARNING':  ':yellow_circle:',
+    'INFO':     ':blue_circle:',
+    'OK':       ':green_circle:',
+    'UNKNOWN':  ':white_circle:',
+}
+
+
+# ─── Event Templates ─────────────────────────────────────────────
+# Each template has a 'title' and 'body' with {variable} placeholders.
+# 'group' is used for UI event filter grouping.
+# 'default_enabled' controls initial state in settings.
+
+TEMPLATES = {
+    # ── Health Monitor state changes ──
+    # NOTE: state_change is disabled by default -- it fires on every
+    # status oscillation (OK->WARNING->OK) which creates noise.
+    # The health_persistent and new_error templates cover this better.
+    'state_change': {
+        'title': '{hostname}: {category} changed to {current}',
+        'body': '{category} status changed from {previous} to {current}.\n{reason}',
+        'group': 'system',
+        'default_enabled': False,
+    },
+    'new_error': {
+        'title': '{hostname}: New {severity} - {category}',
+        'body': '{reason}',
+        'group': 'system',
+        'default_enabled': True,
+    },
+    'error_resolved': {
+        'title': '{hostname}: Resolved - {category}',
+        'body': '{reason}\nDuration: {duration}',
+        'group': 'system',
+        'default_enabled': True,
+    },
+    'error_escalated': {
+        'title': '{hostname}: Escalated to {severity} - {category}',
+        'body': '{reason}',
+        'group': 'system',
+        'default_enabled': True,
+    },
+    
+    # ── VM / CT events ──
+    'vm_start': {
+        'title': '{hostname}: VM {vmid} started',
+        'body': '{vmname} ({vmid}) has been started.',
+        'group': 'vm_ct',
+        'default_enabled': True,
+    },
+    'vm_stop': {
+        'title': '{hostname}: VM {vmid} stopped',
+        'body': '{vmname} ({vmid}) has been stopped.',
+        'group': 'vm_ct',
+        'default_enabled': False,
+    },
+    'vm_shutdown': {
+        'title': '{hostname}: VM {vmid} shutdown',
+        'body': '{vmname} ({vmid}) has been shut down.',
+        'group': 'vm_ct',
+        'default_enabled': False,
+    },
+    'vm_fail': {
+        'title': '{hostname}: VM {vmid} FAILED',
+        'body': '{vmname} ({vmid}) has failed.\n{reason}',
+        'group': 'vm_ct',
+        'default_enabled': True,
+    },
+    'vm_restart': {
+        'title': '{hostname}: VM {vmid} restarted',
+        'body': '{vmname} ({vmid}) has been restarted.',
+        'group': 'vm_ct',
+        'default_enabled': False,
+    },
+    'ct_start': {
+        'title': '{hostname}: CT {vmid} started',
+        'body': '{vmname} ({vmid}) has been started.',
+        'group': 'vm_ct',
+        'default_enabled': True,
+    },
+    'ct_stop': {
+        'title': '{hostname}: CT {vmid} stopped',
+        'body': '{vmname} ({vmid}) has been stopped.',
+        'group': 'vm_ct',
+        'default_enabled': False,
+    },
+    'ct_shutdown': {
+        'title': '{hostname}: CT {vmid} shutdown',
+        'body': '{vmname} ({vmid}) has been shut down.',
+        'group': 'vm_ct',
+        'default_enabled': False,
+    },
+    'ct_restart': {
+        'title': '{hostname}: CT {vmid} restarted',
+        'body': '{vmname} ({vmid}) has been restarted.',
+        'group': 'vm_ct',
+        'default_enabled': False,
+    },
+    'ct_fail': {
+        'title': '{hostname}: CT {vmid} FAILED',
+        'body': '{vmname} ({vmid}) has failed.\n{reason}',
+        'group': 'vm_ct',
+        'default_enabled': True,
+    },
+    'migration_start': {
+        'title': '{hostname}: Migration started - {vmid}',
+        'body': '{vmname} ({vmid}) migration to {target_node} started.',
+        'group': 'vm_ct',
+        'default_enabled': True,
+    },
+    'migration_complete': {
+        'title': '{hostname}: Migration complete - {vmid}',
+        'body': '{vmname} ({vmid}) migrated successfully to {target_node}.',
+        'group': 'vm_ct',
+        'default_enabled': True,
+    },
+    'migration_fail': {
+        'title': '{hostname}: Migration FAILED - {vmid}',
+        'body': '{vmname} ({vmid}) migration to {target_node} failed.\n{reason}',
+        'group': 'vm_ct',
+        'default_enabled': True,
+    },
+    'replication_fail': {
+        'title': '{hostname}: Replication FAILED - {vmid}',
+        'body': 'Replication of {vmname} ({vmid}) has failed.\n{reason}',
+        'group': 'vm_ct',
+        'default_enabled': True,
+    },
+    'replication_complete': {
+        'title': '{hostname}: Replication complete - {vmid}',
+        'body': 'Replication of {vmname} ({vmid}) completed successfully.',
+        'group': 'vm_ct',
+        'default_enabled': False,
+    },
+    
+    # ── Backup / Snapshot events ──
+    'backup_start': {
+        'title': '{hostname}: Backup started - {vmid}',
+        'body': 'Backup of {vmname} ({vmid}) has started.',
+        'group': 'backup',
+        'default_enabled': False,
+    },
+    'backup_complete': {
+        'title': '{hostname}: Backup complete - {vmid}',
+        'body': 'Backup of {vmname} ({vmid}) completed successfully.\nSize: {size}',
+        'group': 'backup',
+        'default_enabled': True,
+    },
+    'backup_fail': {
+        'title': '{hostname}: Backup FAILED - {vmid}',
+        'body': 'Backup of {vmname} ({vmid}) has failed.\n{reason}',
+        'group': 'backup',
+        'default_enabled': True,
+    },
+    'snapshot_complete': {
+        'title': '{hostname}: Snapshot created - {vmid}',
+        'body': 'Snapshot of {vmname} ({vmid}) created: {snapshot_name}',
+        'group': 'backup',
+        'default_enabled': False,
+    },
+    'snapshot_fail': {
+        'title': '{hostname}: Snapshot FAILED - {vmid}',
+        'body': 'Snapshot of {vmname} ({vmid}) failed.\n{reason}',
+        'group': 'backup',
+        'default_enabled': True,
+    },
+    
+    # ── Resource events (from Health Monitor) ──
+    'cpu_high': {
+        'title': '{hostname}: High CPU usage ({value}%)',
+        'body': 'CPU usage is at {value}% on {cores} cores.\n{details}',
+        'group': 'resources',
+        'default_enabled': True,
+    },
+    'ram_high': {
+        'title': '{hostname}: High memory usage ({value}%)',
+        'body': 'Memory usage: {used} / {total} ({value}%).\n{details}',
+        'group': 'resources',
+        'default_enabled': True,
+    },
+    'temp_high': {
+        'title': '{hostname}: High temperature ({value}C)',
+        'body': 'CPU temperature: {value}C (threshold: {threshold}C).\n{details}',
+        'group': 'resources',
+        'default_enabled': True,
+    },
+    'disk_space_low': {
+        'title': '{hostname}: Low disk space on {mount}',
+        'body': '{mount}: {used}% used ({available} available).',
+        'group': 'storage',
+        'default_enabled': True,
+    },
+    'disk_io_error': {
+        'title': '{hostname}: Disk I/O error',
+        'body': '{reason}',
+        'group': 'storage',
+        'default_enabled': True,
+    },
+    'storage_unavailable': {
+        'title': '{hostname}: Storage unavailable - {storage_name}',
+        'body': 'PVE storage "{storage_name}" ({storage_type}) is not available.\n{reason}',
+        'group': 'storage',
+        'default_enabled': True,
+    },
+    'load_high': {
+        'title': '{hostname}: High system load ({value})',
+        'body': 'System load average: {value} on {cores} cores.\n{details}',
+        'group': 'resources',
+        'default_enabled': True,
+    },
+    
+    # ── Network events ──
+    'network_down': {
+        'title': '{hostname}: Network connectivity lost',
+        'body': 'Network connectivity check failed.\n{reason}',
+        'group': 'network',
+        'default_enabled': True,
+    },
+    'network_latency': {
+        'title': '{hostname}: High network latency ({value}ms)',
+        'body': 'Latency to gateway: {value}ms (threshold: {threshold}ms).',
+        'group': 'network',
+        'default_enabled': False,
+    },
+    
+    # ── Security events ──
+    'auth_fail': {
+        'title': '{hostname}: Authentication failure',
+        'body': 'Failed login attempt from {source_ip}.\nUser: {username}\nService: {service}',
+        'group': 'security',
+        'default_enabled': True,
+    },
+    'ip_block': {
+        'title': '{hostname}: IP blocked by Fail2Ban',
+        'body': 'IP {source_ip} has been banned.\nJail: {jail}\nFailures: {failures}',
+        'group': 'security',
+        'default_enabled': True,
+    },
+    'firewall_issue': {
+        'title': '{hostname}: Firewall issue detected',
+        'body': '{reason}',
+        'group': 'security',
+        'default_enabled': True,
+    },
+    'user_permission_change': {
+        'title': '{hostname}: User permission changed',
+        'body': 'User: {username}\nChange: {change_details}',
+        'group': 'security',
+        'default_enabled': True,
+    },
+    
+    # ── Cluster events ──
+    'split_brain': {
+        'title': '{hostname}: SPLIT-BRAIN detected',
+        'body': 'Cluster split-brain condition detected.\nQuorum status: {quorum}',
+        'group': 'cluster',
+        'default_enabled': True,
+    },
+    'node_disconnect': {
+        'title': '{hostname}: Node disconnected',
+        'body': 'Node {node_name} has disconnected from the cluster.',
+        'group': 'cluster',
+        'default_enabled': True,
+    },
+    'node_reconnect': {
+        'title': '{hostname}: Node reconnected',
+        'body': 'Node {node_name} has reconnected to the cluster.',
+        'group': 'cluster',
+        'default_enabled': True,
+    },
+    
+    # ── System events ──
+    'system_shutdown': {
+        'title': '{hostname}: System shutting down',
+        'body': 'The system is shutting down.\n{reason}',
+        'group': 'system',
+        'default_enabled': True,
+    },
+    'system_reboot': {
+        'title': '{hostname}: System rebooting',
+        'body': 'The system is rebooting.\n{reason}',
+        'group': 'system',
+        'default_enabled': True,
+    },
+    'system_problem': {
+        'title': '{hostname}: System problem detected',
+        'body': '{reason}',
+        'group': 'system',
+        'default_enabled': True,
+    },
+    'service_fail': {
+        'title': '{hostname}: Service failed - {service_name}',
+        'body': '{reason}',
+        'group': 'system',
+        'default_enabled': True,
+    },
+    'update_available': {
+        'title': '{hostname}: Updates available ({count})',
+        'body': '{count} package updates are available.\n{details}',
+        'group': 'system',
+        'default_enabled': False,
+    },
+    'update_complete': {
+        'title': '{hostname}: Update completed',
+        'body': '{details}',
+        'group': 'system',
+        'default_enabled': False,
+    },
+    
+    # ── Unknown persistent (from health monitor) ──
+    'unknown_persistent': {
+        'title': '{hostname}: Check unavailable - {category}',
+        'body': 'Health check for {category} has been unavailable for 3+ cycles.\n{reason}',
+        'group': 'system',
+        'default_enabled': False,
+    },
+    
+    # ── Persistent Health Issues (daily digest) ──
+    'health_persistent': {
+        'title': '{hostname}: {count} active health issue(s)',
+        'body': 'The following health issues remain active:\n{issue_list}\n\nThis digest is sent once every 24 hours while issues persist.',
+        'group': 'system',
+        'default_enabled': True,
+    },
+    'health_issue_new': {
+        'title': '{hostname}: New health issue - {category}',
+        'body': 'New {severity} issue detected:\n{reason}',
+        'group': 'system',
+        'default_enabled': True,
+    },
+    'health_issue_resolved': {
+        'title': '{hostname}: Resolved - {category}',
+        'body': '{category} issue has been resolved.\n{reason}\nDuration: {duration}',
+        'group': 'system',
+        'default_enabled': True,
+    },
+    
+    # ── Update notifications (enriched) ──
+    'update_summary': {
+        'title': '{hostname}: {total_count} updates available',
+        'body': '{security_count} security update(s), {total_count} total.\n{package_list}',
+        'group': 'system',
+        'default_enabled': True,
+    },
+    'pve_update': {
+        'title': '{hostname}: PVE update available ({version})',
+        'body': 'Proxmox VE update available: {version}\n{details}',
+        'group': 'system',
+        'default_enabled': True,
+    },
+    
+    # ── PVE webhook test ──
+    'webhook_test': {
+        'title': '{hostname}: Webhook test received',
+        'body': 'PVE webhook connectivity test successful.\n{reason}',
+        'group': 'system',
+        'default_enabled': True,
+    },
+    
+    # ── Burst aggregation summaries ──
+    'burst_auth_fail': {
+        'title': '{hostname}: {count} auth failures in {window}',
+        'body': '{count} authentication failures detected in {window}.\nSources: {entity_list}',
+        'group': 'security',
+        'default_enabled': True,
+    },
+    'burst_ip_block': {
+        'title': '{hostname}: Fail2Ban banned {count} IPs in {window}',
+        'body': '{count} IPs banned by Fail2Ban in {window}.\nIPs: {entity_list}',
+        'group': 'security',
+        'default_enabled': True,
+    },
+    'burst_disk_io': {
+        'title': '{hostname}: {count} disk I/O errors on {entity_list}',
+        'body': '{count} I/O errors detected in {window}.\nDevices: {entity_list}',
+        'group': 'storage',
+        'default_enabled': True,
+    },
+    'burst_cluster': {
+        'title': '{hostname}: Cluster flapping detected ({count} changes)',
+        'body': 'Cluster state changed {count} times in {window}.\nNodes: {entity_list}',
+        'group': 'cluster',
+        'default_enabled': True,
+    },
+    'burst_generic': {
+        'title': '{hostname}: {count} {event_type} events in {window}',
+        'body': '{count} events of type {event_type} in {window}.\n{entity_list}',
+        'group': 'system',
+        'default_enabled': True,
+    },
+}
+
+# ─── Event Groups (for UI filtering) ─────────────────────────────
+
+EVENT_GROUPS = {
+    'system':    {'label': 'System',     'description': 'System health, services, updates'},
+    'vm_ct':     {'label': 'VM / CT',    'description': 'Virtual machines and containers'},
+    'backup':    {'label': 'Backup',     'description': 'Backups and snapshots'},
+    'resources': {'label': 'Resources',  'description': 'CPU, memory, temperature, load'},
+    'storage':   {'label': 'Storage',    'description': 'Disk space and I/O'},
+    'network':   {'label': 'Network',    'description': 'Connectivity and latency'},
+    'security':  {'label': 'Security',   'description': 'Authentication, firewall, bans'},
+    'cluster':   {'label': 'Cluster',    'description': 'Cluster health and quorum'},
+}
+
+
+# ─── Template Renderer ───────────────────────────────────────────
+
+def _get_hostname() -> str:
+    """Get short hostname for message titles."""
+    try:
+        return socket.gethostname().split('.')[0]
+    except Exception:
+        return 'proxmox'
+
+
+def render_template(event_type: str, data: Dict[str, Any]) -> Dict[str, Any]:
+    """Render a template into a structured notification object.
+    
+    Returns structured output usable by all channels:
+        title, body (text), body_text, body_html (escaped), fields, tags, severity, group
+    """
+    import html as html_mod
+    
+    template = TEMPLATES.get(event_type)
+    if not template:
+        fallback_body = data.get('message', data.get('reason', str(data)))
+        severity = data.get('severity', 'INFO')
+        return {
+            'title': f"{_get_hostname()}: {event_type}",
+            'body': fallback_body, 'body_text': fallback_body,
+            'body_html': f'<p>{html_mod.escape(str(fallback_body))}</p>',
+            'fields': [], 'tags': [severity, 'system', event_type],
+            'severity': severity, 'group': 'system',
+        }
+    
+    # Ensure hostname is always available
+    variables = {
+        'hostname': _get_hostname(),
+        'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),
+        'severity': data.get('severity', 'INFO'),
+        # Burst event variables
+        'window': '', 'entity_list': '',
+        # Common defaults
+        'vmid': '', 'vmname': '', 'reason': '', 'summary': '',
+        'details': '', 'category': '', 'previous': '', 'current': '',
+        'duration': '', 'value': '', 'threshold': '',
+        'source_ip': '', 'username': '', 'service': '', 'service_name': '',
+        'node_name': '', 'target_node': '', 'mount': '', 'device': '',
+        'used': '', 'total': '', 'available': '', 'cores': '',
+        'count': '', 'size': '', 'snapshot_name': '', 'jail': '',
+        'failures': '', 'quorum': '', 'change_details': '', 'message': '',
+        'security_count': '0', 'total_count': '0', 'package_list': '',
+        'packages': '', 'pve_packages': '', 'version': '',
+        'issue_list': '', 'error_key': '',
+        'storage_name': '', 'storage_type': '',
+    }
+    variables.update(data)
+    
+    try:
+        title = template['title'].format(**variables)
+    except (KeyError, ValueError):
+        title = template['title']
+    
+    # ── PVE vzdump special formatting ──
+    # When the event came from PVE webhook with a full vzdump message,
+    # parse the table/logs and format a rich body instead of the sparse template.
+    pve_message = data.get('pve_message', '')
+    pve_title = data.get('pve_title', '')
+    
+    if event_type in ('backup_complete', 'backup_fail') and pve_message:
+        parsed = _parse_vzdump_message(pve_message)
+        if parsed:
+            is_success = (event_type == 'backup_complete')
+            body_text = _format_vzdump_body(parsed, is_success)
+            # Use PVE's own title if available (contains hostname and status)
+            if pve_title:
+                title = pve_title
+        else:
+            # Couldn't parse -- use PVE raw message as body
+            body_text = pve_message.strip()
+    elif event_type == 'system_mail' and pve_message:
+        # System mail -- use PVE message directly (mail bounce, cron, smartd)
+        body_text = pve_message.strip()[:1000]
+    else:
+        try:
+            body_text = template['body'].format(**variables)
+        except (KeyError, ValueError):
+            body_text = template['body']
+    
+    # Clean up: collapse runs of 3+ blank lines into 1, remove trailing whitespace
+    import re as _re
+    body_text = _re.sub(r'\n{3,}', '\n\n', body_text.strip())
+    
+    severity = variables.get('severity', 'INFO')
+    group = template.get('group', 'system')
+    
+    # Build structured fields for Discord embeds / rich notifications
+    fields = []
+    field_map = [
+        ('vmid', 'VM/CT'), ('vmname', 'Name'), ('device', 'Device'),
+        ('source_ip', 'Source IP'), ('node_name', 'Node'), ('category', 'Category'),
+        ('service_name', 'Service'), ('jail', 'Jail'), ('username', 'User'),
+        ('count', 'Count'), ('window', 'Window'), ('entity_list', 'Affected'),
+    ]
+    for key, label in field_map:
+        val = variables.get(key, '')
+        if val:
+            fields.append((label, str(val)))
+    
+    # Build HTML body with escaped content
+    body_html_parts = []
+    for line in body_text.split('\n'):
+        if line.strip():
+            body_html_parts.append(f'<p>{html_mod.escape(line)}</p>')
+    body_html = '\n'.join(body_html_parts) if body_html_parts else f'<p>{html_mod.escape(body_text)}</p>'
+    
+    return {
+        'title': title,
+        'body': body_text,         # backward compat
+        'body_text': body_text,
+        'body_html': body_html,
+        'fields': fields,
+        'tags': [severity, group, event_type],
+        'severity': severity,
+        'group': group,
+    }
+
+
+def get_event_types_by_group() -> Dict[str, list]:
+    """Get all event types organized by group, for UI rendering.
+    
+    Returns:
+        {group_key: [{'type': event_type, 'title': template_title, 
+                       'default_enabled': bool}, ...]}
+    """
+    result = {}
+    for event_type, template in TEMPLATES.items():
+        group = template.get('group', 'system')
+        if group not in result:
+            result[group] = []
+        import re
+        # Clean title: remove {hostname}: prefix and any remaining {placeholders}
+        title = template['title'].replace('{hostname}', '').strip(': ')
+        title = re.sub(r'\s*\{[^}]+\}', '', title).strip(' -:')
+        if not title:
+            title = event_type.replace('_', ' ').title()
+        result[group].append({
+            'type': event_type,
+            'title': title,
+            'default_enabled': template.get('default_enabled', True),
+        })
+    return result
+
+
+def get_default_enabled_events() -> Dict[str, bool]:
+    """Get the default enabled state for all event types."""
+    return {
+        event_type: template.get('default_enabled', True)
+        for event_type, template in TEMPLATES.items()
+    }
+
+
+# ─── AI Enhancement (Optional) ───────────────────────────────────
+
+class AIEnhancer:
+    """Optional AI message enhancement using external LLM API.
+    
+    Enriches template-generated messages with context and suggestions.
+    Falls back to original message if AI is unavailable or fails.
+    """
+    
+    SYSTEM_PROMPT = """You are a Proxmox system administrator assistant. 
+You receive a notification message about a server event and must enhance it with:
+1. A brief explanation of what this means in practical terms
+2. A suggested action if applicable (1-2 sentences max)
+
+Keep the response concise (max 3 sentences total). Do not repeat the original message.
+Respond in the same language as the input message."""
+    
+    def __init__(self, provider: str, api_key: str, model: str = ''):
+        self.provider = provider.lower()
+        self.api_key = api_key
+        self.model = model
+        self._enabled = bool(api_key)
+    
+    @property
+    def enabled(self) -> bool:
+        return self._enabled
+    
+    def enhance(self, title: str, body: str, severity: str) -> Optional[str]:
+        """Enhance a notification message with AI context.
+        
+        Returns enhanced body text, or None if enhancement fails/disabled.
+        """
+        if not self._enabled:
+            return None
+        
+        try:
+            if self.provider in ('openai', 'groq'):
+                return self._call_openai_compatible(title, body, severity)
+        except Exception as e:
+            print(f"[AIEnhancer] Enhancement failed: {e}")
+        
+        return None
+    
+    def _call_openai_compatible(self, title: str, body: str, severity: str) -> Optional[str]:
+        """Call OpenAI-compatible API (works with OpenAI, Groq, local)."""
+        if self.provider == 'groq':
+            url = 'https://api.groq.com/openai/v1/chat/completions'
+            model = self.model or 'llama-3.3-70b-versatile'
+        else:  # openai
+            url = 'https://api.openai.com/v1/chat/completions'
+            model = self.model or 'gpt-4o-mini'
+        
+        user_msg = f"Severity: {severity}\nTitle: {title}\nMessage: {body}"
+        
+        payload = json.dumps({
+            'model': model,
+            'messages': [
+                {'role': 'system', 'content': self.SYSTEM_PROMPT},
+                {'role': 'user', 'content': user_msg},
+            ],
+            'max_tokens': 150,
+            'temperature': 0.3,
+        }).encode('utf-8')
+        
+        headers = {
+            'Content-Type': 'application/json',
+            'Authorization': f'Bearer {self.api_key}',
+        }
+        
+        req = urllib.request.Request(url, data=payload, headers=headers)
+        with urllib.request.urlopen(req, timeout=10) as resp:
+            result = json.loads(resp.read().decode('utf-8'))
+            content = result['choices'][0]['message']['content'].strip()
+            return content if content else None
+
+
+def format_with_ai(title: str, body: str, severity: str,
+                   ai_config: Dict[str, str]) -> str:
+    """Format a message with optional AI enhancement.
+    
+    If AI is configured and succeeds, appends AI insight to the body.
+    Otherwise returns the original body unchanged.
+    
+    Args:
+        title: Notification title
+        body: Notification body
+        severity: Severity level
+        ai_config: {'enabled': 'true', 'provider': 'groq', 'api_key': '...', 'model': ''}
+    
+    Returns:
+        Enhanced body string
+    """
+    if ai_config.get('enabled') != 'true' or not ai_config.get('api_key'):
+        return body
+    
+    enhancer = AIEnhancer(
+        provider=ai_config.get('provider', 'groq'),
+        api_key=ai_config['api_key'],
+        model=ai_config.get('model', ''),
+    )
+    
+    insight = enhancer.enhance(title, body, severity)
+    if insight:
+        return f"{body}\n\n---\n{insight}"
+    
+    return body