Update AppImage

This commit is contained in:
MacRimi
2025-11-09 18:23:27 +01:00
parent a59489f804
commit 8fb8134898
2 changed files with 73 additions and 29 deletions

View File

@@ -1170,6 +1170,7 @@ class HealthMonitor:
"""
Check logs with persistent error tracking.
Critical log errors persist for 24h unless acknowledged.
Groups similar errors to avoid false high counts.
"""
cache_key = 'logs_analysis'
current_time = time.time()
@@ -1196,20 +1197,24 @@ class HealthMonitor:
if result.returncode == 0:
lines = result.stdout.strip().split('\n')
errors_5m = 0
warnings_5m = 0
critical_keywords_found = []
error_patterns = {} # pattern -> count
critical_keywords_found = set()
for line in lines:
if not line.strip():
continue
line_lower = line.lower()
# Check for critical keywords first
critical_found = False
for keyword in self.CRITICAL_LOG_KEYWORDS:
if keyword.lower() in line_lower:
critical_keywords_found.append(keyword)
errors_5m += 1
critical_keywords_found.add(keyword)
critical_found = True
# Record persistent error for critical keywords
error_key = f'log_critical_{keyword.replace(" ", "_")}'
error_key = f'log_critical_{keyword.replace(" ", "_").replace("/", "_")}'
health_persistence.record_error(
error_key=error_key,
category='logs',
@@ -1218,27 +1223,42 @@ class HealthMonitor:
details={'keyword': keyword}
)
break
else:
if 'error' in line_lower or 'critical' in line_lower or 'fatal' in line_lower:
errors_5m += 1
elif 'warning' in line_lower or 'warn' in line_lower:
warnings_5m += 1
if critical_found:
continue
# Remove timestamps, PIDs, and specific IDs to group similar errors
pattern = re.sub(r'\d{4}-\d{2}-\d{2}', '', line_lower) # Remove dates
pattern = re.sub(r'\d{2}:\d{2}:\d{2}', '', pattern) # Remove times
pattern = re.sub(r'pid[:\s]+\d+', 'pid:XXX', pattern) # Normalize PIDs
pattern = re.sub(r'\b\d{3,6}\b', 'ID', pattern) # Normalize IDs
pattern = re.sub(r'/dev/\S+', '/dev/XXX', pattern) # Normalize devices
pattern = pattern[:100] # Keep first 100 chars as pattern
# Classify error level
if 'error' in line_lower or 'critical' in line_lower or 'fatal' in line_lower:
error_patterns[f'error:{pattern}'] = error_patterns.get(f'error:{pattern}', 0) + 1
elif 'warning' in line_lower or 'warn' in line_lower:
error_patterns[f'warning:{pattern}'] = error_patterns.get(f'warning:{pattern}', 0) + 1
unique_errors = sum(1 for k in error_patterns.keys() if k.startswith('error:'))
unique_warnings = sum(1 for k in error_patterns.keys() if k.startswith('warning:'))
if critical_keywords_found:
status = 'CRITICAL'
reason = f'Critical errors: {", ".join(set(critical_keywords_found[:3]))}'
elif errors_5m >= self.LOG_ERRORS_CRITICAL:
reason = f'Critical errors: {", ".join(list(critical_keywords_found)[:3])}'
elif unique_errors >= self.LOG_ERRORS_CRITICAL:
status = 'CRITICAL'
reason = f'{errors_5m} errors in 5 minutes'
elif warnings_5m >= self.LOG_WARNINGS_CRITICAL:
reason = f'{unique_errors} unique errors in 5 minutes'
elif unique_warnings >= self.LOG_WARNINGS_CRITICAL:
status = 'WARNING'
reason = f'{warnings_5m} warnings in 5 minutes'
elif errors_5m >= self.LOG_ERRORS_WARNING:
reason = f'{unique_warnings} unique warnings in 5 minutes'
elif unique_errors >= self.LOG_ERRORS_WARNING:
status = 'WARNING'
reason = f'{errors_5m} errors in 5 minutes'
elif warnings_5m >= self.LOG_WARNINGS_WARNING:
reason = f'{unique_errors} unique errors in 5 minutes'
elif unique_warnings >= self.LOG_WARNINGS_WARNING:
status = 'WARNING'
reason = f'{warnings_5m} warnings in 5 minutes'
reason = f'{unique_warnings} unique warnings in 5 minutes'
else:
status = 'OK'
reason = None

View File

@@ -101,18 +101,23 @@ class HealthPersistence:
if existing:
error_id, first_seen, notif_sent, acknowledged, resolved_at = existing
# If acknowledged within last 24 hours, do not re-add unless resolved and re-occurred
if acknowledged and not resolved_at:
first_seen_dt = datetime.fromisoformat(first_seen)
if (datetime.now() - first_seen_dt).total_seconds() < 86400: # 24 hours
# Skip re-adding recently acknowledged errors
conn.close()
return {'type': 'skipped', 'needs_notification': False}
if acknowledged == 1 and resolved_at is not None:
# Check if acknowledged recently (within 24h)
try:
resolved_dt = datetime.fromisoformat(resolved_at)
hours_since_ack = (datetime.now() - resolved_dt).total_seconds() / 3600
if hours_since_ack < 24:
# Skip re-adding recently acknowledged errors
conn.close()
return {'type': 'skipped_acknowledged', 'needs_notification': False}
except Exception:
pass
# Update existing error
# Update existing error (only if not acknowledged or >24h passed)
cursor.execute('''
UPDATE errors
SET last_seen = ?, severity = ?, reason = ?, details = ?, resolved_at = NULL
SET last_seen = ?, severity = ?, reason = ?, details = ?, resolved_at = NULL, acknowledged = 0
WHERE error_key = ?
''', (now, severity, reason, details_json, error_key))
@@ -123,6 +128,25 @@ class HealthPersistence:
event_info['type'] = 'escalated'
event_info['needs_notification'] = True
else:
cursor.execute('''
SELECT resolved_at, acknowledged FROM errors
WHERE error_key = ? AND acknowledged = 1
ORDER BY resolved_at DESC LIMIT 1
''', (error_key,))
recent_ack = cursor.fetchone()
if recent_ack and recent_ack[0]:
try:
resolved_dt = datetime.fromisoformat(recent_ack[0])
hours_since_ack = (datetime.now() - resolved_dt).total_seconds() / 3600
if hours_since_ack < 24:
# Don't re-add recently acknowledged errors
conn.close()
return {'type': 'skipped_acknowledged', 'needs_notification': False}
except Exception:
pass
# Insert new error
cursor.execute('''
INSERT INTO errors