Update notification service

2026-04-25 17:06:26 +00:00 · 2026-02-27 19:47:36 +01:00
parent be119a69af
commit 171e7ddcae
4 changed files with 84 additions and 43 deletions
--- a/AppImage/scripts/health_monitor.py
+++ b/AppImage/scripts/health_monitor.py
@@ -324,7 +324,8 @@ class HealthMonitor:
        Returns JSON structure with ALL 10 categories always present.
        Now includes persistent error tracking.
        """
-        # Run cleanup on every status check to auto-resolve stale errors
+        # Run cleanup on every status check so stale errors are auto-resolved
+        # using the user-configured Suppression Duration (single source of truth).
        try:
            health_persistence.cleanup_old_errors()
        except Exception:
@@ -2157,18 +2158,18 @@ class HealthMonitor:
                    # Get a representative critical error reason
                    representative_error = next(iter(critical_errors_found.values()))
                    reason = f'Critical error detected: {representative_error[:100]}'
-                elif cascade_count > 0:
-                    status = 'WARNING'
-                    samples = _get_samples(cascading_errors, 3)
-                    reason = f'Error cascade ({cascade_count} patterns repeating):\n' + '\n'.join(f'  - {s}' for s in samples)
-                elif spike_count > 0:
-                    status = 'WARNING'
-                    samples = _get_samples(spike_errors, 3)
-                    reason = f'Error spike ({spike_count} patterns with 4x increase):\n' + '\n'.join(f'  - {s}' for s in samples)
-                elif persistent_count > 0:
-                    status = 'WARNING'
-                    samples = _get_samples(persistent_errors, 3)
-                    reason = f'Persistent errors ({persistent_count} patterns over 15+ min):\n' + '\n'.join(f'  - {s}' for s in samples)
+        elif cascade_count > 0:
+                status = 'WARNING'
+                samples = _get_samples(cascading_errors, 3)
+                reason = f'Error cascade ({cascade_count} patterns repeating):\n' + '\n'.join(f'  - {s}' for s in samples)
+            elif spike_count > 0:
+                status = 'WARNING'
+                samples = _get_samples(spike_errors, 3)
+                reason = f'Error spike ({spike_count} patterns with 4x increase):\n' + '\n'.join(f'  - {s}' for s in samples)
+            elif persistent_count > 0:
+                status = 'WARNING'
+                samples = _get_samples(persistent_errors, 3)
+                reason = f'Persistent errors ({persistent_count} patterns over 15+ min):\n' + '\n'.join(f'  - {s}' for s in samples)
                else:
                    # No significant issues found
                    status = 'OK'
@@ -2189,23 +2190,23 @@ class HealthMonitor:
                'log_critical_errors': {'active': unique_critical_count > 0, 'severity': 'CRITICAL',
                    'reason': f'{unique_critical_count} critical error(s) found', 'dismissable': False},
            }
-            
-            # Track which sub-checks were dismissed
-            dismissed_keys = set()
-            for err_key, info in log_sub_checks.items():
-                if info['active']:
-                    is_dismissable = info.get('dismissable', True)
-                    result = health_persistence.record_error(
-                        error_key=err_key,
-                        category='logs',
-                        severity=info['severity'],
-                        reason=info['reason'],
-                        details={'dismissable': is_dismissable}
-                    )
-                    if result and result.get('type') == 'skipped_acknowledged':
-                        dismissed_keys.add(err_key)
-                elif health_persistence.is_error_active(err_key):
-                    health_persistence.clear_error(err_key)
+                
+                # Track which sub-checks were dismissed
+                dismissed_keys = set()
+                for err_key, info in log_sub_checks.items():
+                    if info['active']:
+                        is_dismissable = info.get('dismissable', True)
+                        result = health_persistence.record_error(
+                            error_key=err_key,
+                            category='logs',
+                            severity=info['severity'],
+                            reason=info['reason'],
+                            details={'dismissable': is_dismissable}
+                        )
+                        if result and result.get('type') == 'skipped_acknowledged':
+                            dismissed_keys.add(err_key)
+                    elif health_persistence.is_error_active(err_key):
+                        health_persistence.clear_error(err_key)
                
                # Build checks dict - downgrade dismissed items to INFO
                def _log_check_status(key, active, severity):
--- a/AppImage/scripts/health_persistence.py
+++ b/AppImage/scripts/health_persistence.py
@@ -26,7 +26,7 @@ class HealthPersistence:
    """Manages persistent health error tracking"""
    
    # Default suppression duration when no user setting exists for a category.
-    # Users can override per-category via the Suppression Duration settings.
+    # Users override per-category via the Suppression Duration settings UI.
    DEFAULT_SUPPRESSION_HOURS = 24
    
    # Mapping from error categories to settings keys
@@ -498,13 +498,16 @@ class HealthPersistence:
        cutoff_resolved = (now - timedelta(days=7)).isoformat()
        cursor.execute('DELETE FROM errors WHERE resolved_at < ?', (cutoff_resolved,))
        
-        # ── Auto-resolve stale errors using user-configured Suppression Duration ──
-        # Read the per-category suppression hours from user_settings.
-        # If the user hasn't configured a category, fall back to DEFAULT_SUPPRESSION_HOURS.
+        # ── Auto-resolve stale errors using Suppression Duration settings ──
+        # Read per-category suppression hours from user_settings.
+        # If the user hasn't configured a value, use DEFAULT_SUPPRESSION_HOURS.
        # This is the SINGLE source of truth for auto-resolution timing.
        user_settings = {}
        try:
-            cursor.execute('SELECT setting_key, setting_value FROM user_settings WHERE setting_key LIKE ?', ('suppress_%',))
+            cursor.execute(
+                'SELECT setting_key, setting_value FROM user_settings WHERE setting_key LIKE ?',
+                ('suppress_%',)
+            )
            for row in cursor.fetchall():
                user_settings[row[0]] = row[1]
        except Exception:
@@ -517,6 +520,10 @@ class HealthPersistence:
            except (ValueError, TypeError):
                hours = self.DEFAULT_SUPPRESSION_HOURS
            
+            # -1 means permanently suppressed -- skip auto-resolve
+            if hours < 0:
+                continue
+            
            cutoff = (now - timedelta(hours=hours)).isoformat()
            cursor.execute('''
                UPDATE errors 
@@ -527,7 +534,7 @@ class HealthPersistence:
                  AND acknowledged = 0
            ''', (now_iso, category, cutoff))
        
-        # Catch-all: auto-resolve ANY error from an unmapped category
+        # Catch-all: auto-resolve any error from an unmapped category
        # whose last_seen exceeds DEFAULT_SUPPRESSION_HOURS.
        fallback_cutoff = (now - timedelta(hours=self.DEFAULT_SUPPRESSION_HOURS)).isoformat()
        cursor.execute('''