mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-25 17:06:26 +00:00
Update notification service
This commit is contained in:
@@ -324,7 +324,8 @@ class HealthMonitor:
|
||||
Returns JSON structure with ALL 10 categories always present.
|
||||
Now includes persistent error tracking.
|
||||
"""
|
||||
# Run cleanup on every status check to auto-resolve stale errors
|
||||
# Run cleanup on every status check so stale errors are auto-resolved
|
||||
# using the user-configured Suppression Duration (single source of truth).
|
||||
try:
|
||||
health_persistence.cleanup_old_errors()
|
||||
except Exception:
|
||||
@@ -2157,18 +2158,18 @@ class HealthMonitor:
|
||||
# Get a representative critical error reason
|
||||
representative_error = next(iter(critical_errors_found.values()))
|
||||
reason = f'Critical error detected: {representative_error[:100]}'
|
||||
elif cascade_count > 0:
|
||||
status = 'WARNING'
|
||||
samples = _get_samples(cascading_errors, 3)
|
||||
reason = f'Error cascade ({cascade_count} patterns repeating):\n' + '\n'.join(f' - {s}' for s in samples)
|
||||
elif spike_count > 0:
|
||||
status = 'WARNING'
|
||||
samples = _get_samples(spike_errors, 3)
|
||||
reason = f'Error spike ({spike_count} patterns with 4x increase):\n' + '\n'.join(f' - {s}' for s in samples)
|
||||
elif persistent_count > 0:
|
||||
status = 'WARNING'
|
||||
samples = _get_samples(persistent_errors, 3)
|
||||
reason = f'Persistent errors ({persistent_count} patterns over 15+ min):\n' + '\n'.join(f' - {s}' for s in samples)
|
||||
elif cascade_count > 0:
|
||||
status = 'WARNING'
|
||||
samples = _get_samples(cascading_errors, 3)
|
||||
reason = f'Error cascade ({cascade_count} patterns repeating):\n' + '\n'.join(f' - {s}' for s in samples)
|
||||
elif spike_count > 0:
|
||||
status = 'WARNING'
|
||||
samples = _get_samples(spike_errors, 3)
|
||||
reason = f'Error spike ({spike_count} patterns with 4x increase):\n' + '\n'.join(f' - {s}' for s in samples)
|
||||
elif persistent_count > 0:
|
||||
status = 'WARNING'
|
||||
samples = _get_samples(persistent_errors, 3)
|
||||
reason = f'Persistent errors ({persistent_count} patterns over 15+ min):\n' + '\n'.join(f' - {s}' for s in samples)
|
||||
else:
|
||||
# No significant issues found
|
||||
status = 'OK'
|
||||
@@ -2189,23 +2190,23 @@ class HealthMonitor:
|
||||
'log_critical_errors': {'active': unique_critical_count > 0, 'severity': 'CRITICAL',
|
||||
'reason': f'{unique_critical_count} critical error(s) found', 'dismissable': False},
|
||||
}
|
||||
|
||||
# Track which sub-checks were dismissed
|
||||
dismissed_keys = set()
|
||||
for err_key, info in log_sub_checks.items():
|
||||
if info['active']:
|
||||
is_dismissable = info.get('dismissable', True)
|
||||
result = health_persistence.record_error(
|
||||
error_key=err_key,
|
||||
category='logs',
|
||||
severity=info['severity'],
|
||||
reason=info['reason'],
|
||||
details={'dismissable': is_dismissable}
|
||||
)
|
||||
if result and result.get('type') == 'skipped_acknowledged':
|
||||
dismissed_keys.add(err_key)
|
||||
elif health_persistence.is_error_active(err_key):
|
||||
health_persistence.clear_error(err_key)
|
||||
|
||||
# Track which sub-checks were dismissed
|
||||
dismissed_keys = set()
|
||||
for err_key, info in log_sub_checks.items():
|
||||
if info['active']:
|
||||
is_dismissable = info.get('dismissable', True)
|
||||
result = health_persistence.record_error(
|
||||
error_key=err_key,
|
||||
category='logs',
|
||||
severity=info['severity'],
|
||||
reason=info['reason'],
|
||||
details={'dismissable': is_dismissable}
|
||||
)
|
||||
if result and result.get('type') == 'skipped_acknowledged':
|
||||
dismissed_keys.add(err_key)
|
||||
elif health_persistence.is_error_active(err_key):
|
||||
health_persistence.clear_error(err_key)
|
||||
|
||||
# Build checks dict - downgrade dismissed items to INFO
|
||||
def _log_check_status(key, active, severity):
|
||||
|
||||
@@ -26,7 +26,7 @@ class HealthPersistence:
|
||||
"""Manages persistent health error tracking"""
|
||||
|
||||
# Default suppression duration when no user setting exists for a category.
|
||||
# Users can override per-category via the Suppression Duration settings.
|
||||
# Users override per-category via the Suppression Duration settings UI.
|
||||
DEFAULT_SUPPRESSION_HOURS = 24
|
||||
|
||||
# Mapping from error categories to settings keys
|
||||
@@ -498,13 +498,16 @@ class HealthPersistence:
|
||||
cutoff_resolved = (now - timedelta(days=7)).isoformat()
|
||||
cursor.execute('DELETE FROM errors WHERE resolved_at < ?', (cutoff_resolved,))
|
||||
|
||||
# ── Auto-resolve stale errors using user-configured Suppression Duration ──
|
||||
# Read the per-category suppression hours from user_settings.
|
||||
# If the user hasn't configured a category, fall back to DEFAULT_SUPPRESSION_HOURS.
|
||||
# ── Auto-resolve stale errors using Suppression Duration settings ──
|
||||
# Read per-category suppression hours from user_settings.
|
||||
# If the user hasn't configured a value, use DEFAULT_SUPPRESSION_HOURS.
|
||||
# This is the SINGLE source of truth for auto-resolution timing.
|
||||
user_settings = {}
|
||||
try:
|
||||
cursor.execute('SELECT setting_key, setting_value FROM user_settings WHERE setting_key LIKE ?', ('suppress_%',))
|
||||
cursor.execute(
|
||||
'SELECT setting_key, setting_value FROM user_settings WHERE setting_key LIKE ?',
|
||||
('suppress_%',)
|
||||
)
|
||||
for row in cursor.fetchall():
|
||||
user_settings[row[0]] = row[1]
|
||||
except Exception:
|
||||
@@ -517,6 +520,10 @@ class HealthPersistence:
|
||||
except (ValueError, TypeError):
|
||||
hours = self.DEFAULT_SUPPRESSION_HOURS
|
||||
|
||||
# -1 means permanently suppressed -- skip auto-resolve
|
||||
if hours < 0:
|
||||
continue
|
||||
|
||||
cutoff = (now - timedelta(hours=hours)).isoformat()
|
||||
cursor.execute('''
|
||||
UPDATE errors
|
||||
@@ -527,7 +534,7 @@ class HealthPersistence:
|
||||
AND acknowledged = 0
|
||||
''', (now_iso, category, cutoff))
|
||||
|
||||
# Catch-all: auto-resolve ANY error from an unmapped category
|
||||
# Catch-all: auto-resolve any error from an unmapped category
|
||||
# whose last_seen exceeds DEFAULT_SUPPRESSION_HOURS.
|
||||
fallback_cutoff = (now - timedelta(hours=self.DEFAULT_SUPPRESSION_HOURS)).isoformat()
|
||||
cursor.execute('''
|
||||
|
||||
Reference in New Issue
Block a user