From 171e7ddcae88a47dae0fc7d2c336d9a26cc457bc Mon Sep 17 00:00:00 2001 From: MacRimi Date: Fri, 27 Feb 2026 19:47:36 +0100 Subject: [PATCH] Update notification service --- AppImage/components/health-status-modal.tsx | 14 ++--- AppImage/components/storage-overview.tsx | 33 +++++++++++ AppImage/scripts/health_monitor.py | 61 +++++++++++---------- AppImage/scripts/health_persistence.py | 19 +++++-- 4 files changed, 84 insertions(+), 43 deletions(-) diff --git a/AppImage/components/health-status-modal.tsx b/AppImage/components/health-status-modal.tsx index b6da6742..b0d015cf 100644 --- a/AppImage/components/health-status-modal.tsx +++ b/AppImage/components/health-status-modal.tsx @@ -408,10 +408,10 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu key={checkKey} className="flex items-center justify-between gap-1.5 sm:gap-2 text-[10px] sm:text-xs py-1.5 px-2 sm:px-3 rounded-md hover:bg-muted/40 transition-colors" > -
- {getStatusIcon(checkData.status, "sm")} +
+ {getStatusIcon(checkData.status, "sm")} {formatCheckLabel(checkKey)} - {checkData.detail} + {checkData.detail} {checkData.dismissed && ( Dismissed @@ -520,8 +520,8 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
{healthData.summary && healthData.summary !== "All systems operational" && ( -
-

{healthData.summary}

+
+

{healthData.summary}

)} @@ -559,7 +559,7 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu )}
{reason && !isExpanded && ( -

{reason}

+

{reason}

)}
@@ -578,7 +578,7 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu {isExpanded && (
{reason && ( -

{reason}

+

{reason}

)} {hasChecks ? ( renderChecks(checks, key) diff --git a/AppImage/components/storage-overview.tsx b/AppImage/components/storage-overview.tsx index a9a4cf79..648993b9 100644 --- a/AppImage/components/storage-overview.tsx +++ b/AppImage/components/storage-overview.tsx @@ -34,6 +34,12 @@ interface DiskInfo { wear_leveling_count?: number // SSD: Wear Leveling Count total_lbas_written?: number // SSD/NVMe: Total LBAs Written (GB) ssd_life_left?: number // SSD: SSD Life Left percentage + io_errors?: { + count: number + severity: string + sample: string + reason: string + } } interface ZFSPool { @@ -776,6 +782,17 @@ export function StorageOverview() {
+ {disk.io_errors && disk.io_errors.count > 0 && ( +
+ + {disk.io_errors.count} I/O error{disk.io_errors.count !== 1 ? 's' : ''} in 5 min +
+ )} +
{disk.size_formatted && (
@@ -841,6 +858,22 @@ export function StorageOverview() {
+ {disk.io_errors && disk.io_errors.count > 0 && ( +
+ +
+ {disk.io_errors.count} I/O error{disk.io_errors.count !== 1 ? 's' : ''} in 5 min + {disk.io_errors.sample && ( +

{disk.io_errors.sample}

+ )} +
+
+ )} +
{disk.size_formatted && (
diff --git a/AppImage/scripts/health_monitor.py b/AppImage/scripts/health_monitor.py index 4e8a3c48..97966d5d 100644 --- a/AppImage/scripts/health_monitor.py +++ b/AppImage/scripts/health_monitor.py @@ -324,7 +324,8 @@ class HealthMonitor: Returns JSON structure with ALL 10 categories always present. Now includes persistent error tracking. """ - # Run cleanup on every status check to auto-resolve stale errors + # Run cleanup on every status check so stale errors are auto-resolved + # using the user-configured Suppression Duration (single source of truth). try: health_persistence.cleanup_old_errors() except Exception: @@ -2157,18 +2158,18 @@ class HealthMonitor: # Get a representative critical error reason representative_error = next(iter(critical_errors_found.values())) reason = f'Critical error detected: {representative_error[:100]}' - elif cascade_count > 0: - status = 'WARNING' - samples = _get_samples(cascading_errors, 3) - reason = f'Error cascade ({cascade_count} patterns repeating):\n' + '\n'.join(f' - {s}' for s in samples) - elif spike_count > 0: - status = 'WARNING' - samples = _get_samples(spike_errors, 3) - reason = f'Error spike ({spike_count} patterns with 4x increase):\n' + '\n'.join(f' - {s}' for s in samples) - elif persistent_count > 0: - status = 'WARNING' - samples = _get_samples(persistent_errors, 3) - reason = f'Persistent errors ({persistent_count} patterns over 15+ min):\n' + '\n'.join(f' - {s}' for s in samples) + elif cascade_count > 0: + status = 'WARNING' + samples = _get_samples(cascading_errors, 3) + reason = f'Error cascade ({cascade_count} patterns repeating):\n' + '\n'.join(f' - {s}' for s in samples) + elif spike_count > 0: + status = 'WARNING' + samples = _get_samples(spike_errors, 3) + reason = f'Error spike ({spike_count} patterns with 4x increase):\n' + '\n'.join(f' - {s}' for s in samples) + elif persistent_count > 0: + status = 'WARNING' + samples = _get_samples(persistent_errors, 3) + reason = f'Persistent errors ({persistent_count} patterns over 15+ min):\n' + '\n'.join(f' - {s}' for s in samples) else: # No significant issues found status = 'OK' @@ -2189,23 +2190,23 @@ class HealthMonitor: 'log_critical_errors': {'active': unique_critical_count > 0, 'severity': 'CRITICAL', 'reason': f'{unique_critical_count} critical error(s) found', 'dismissable': False}, } - - # Track which sub-checks were dismissed - dismissed_keys = set() - for err_key, info in log_sub_checks.items(): - if info['active']: - is_dismissable = info.get('dismissable', True) - result = health_persistence.record_error( - error_key=err_key, - category='logs', - severity=info['severity'], - reason=info['reason'], - details={'dismissable': is_dismissable} - ) - if result and result.get('type') == 'skipped_acknowledged': - dismissed_keys.add(err_key) - elif health_persistence.is_error_active(err_key): - health_persistence.clear_error(err_key) + + # Track which sub-checks were dismissed + dismissed_keys = set() + for err_key, info in log_sub_checks.items(): + if info['active']: + is_dismissable = info.get('dismissable', True) + result = health_persistence.record_error( + error_key=err_key, + category='logs', + severity=info['severity'], + reason=info['reason'], + details={'dismissable': is_dismissable} + ) + if result and result.get('type') == 'skipped_acknowledged': + dismissed_keys.add(err_key) + elif health_persistence.is_error_active(err_key): + health_persistence.clear_error(err_key) # Build checks dict - downgrade dismissed items to INFO def _log_check_status(key, active, severity): diff --git a/AppImage/scripts/health_persistence.py b/AppImage/scripts/health_persistence.py index 53ebe257..fede9b53 100644 --- a/AppImage/scripts/health_persistence.py +++ b/AppImage/scripts/health_persistence.py @@ -26,7 +26,7 @@ class HealthPersistence: """Manages persistent health error tracking""" # Default suppression duration when no user setting exists for a category. - # Users can override per-category via the Suppression Duration settings. + # Users override per-category via the Suppression Duration settings UI. DEFAULT_SUPPRESSION_HOURS = 24 # Mapping from error categories to settings keys @@ -498,13 +498,16 @@ class HealthPersistence: cutoff_resolved = (now - timedelta(days=7)).isoformat() cursor.execute('DELETE FROM errors WHERE resolved_at < ?', (cutoff_resolved,)) - # ── Auto-resolve stale errors using user-configured Suppression Duration ── - # Read the per-category suppression hours from user_settings. - # If the user hasn't configured a category, fall back to DEFAULT_SUPPRESSION_HOURS. + # ── Auto-resolve stale errors using Suppression Duration settings ── + # Read per-category suppression hours from user_settings. + # If the user hasn't configured a value, use DEFAULT_SUPPRESSION_HOURS. # This is the SINGLE source of truth for auto-resolution timing. user_settings = {} try: - cursor.execute('SELECT setting_key, setting_value FROM user_settings WHERE setting_key LIKE ?', ('suppress_%',)) + cursor.execute( + 'SELECT setting_key, setting_value FROM user_settings WHERE setting_key LIKE ?', + ('suppress_%',) + ) for row in cursor.fetchall(): user_settings[row[0]] = row[1] except Exception: @@ -517,6 +520,10 @@ class HealthPersistence: except (ValueError, TypeError): hours = self.DEFAULT_SUPPRESSION_HOURS + # -1 means permanently suppressed -- skip auto-resolve + if hours < 0: + continue + cutoff = (now - timedelta(hours=hours)).isoformat() cursor.execute(''' UPDATE errors @@ -527,7 +534,7 @@ class HealthPersistence: AND acknowledged = 0 ''', (now_iso, category, cutoff)) - # Catch-all: auto-resolve ANY error from an unmapped category + # Catch-all: auto-resolve any error from an unmapped category # whose last_seen exceeds DEFAULT_SUPPRESSION_HOURS. fallback_cutoff = (now - timedelta(hours=self.DEFAULT_SUPPRESSION_HOURS)).isoformat() cursor.execute('''