mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2025-11-17 19:16:25 +00:00
Update AppImage
This commit is contained in:
@@ -1170,6 +1170,7 @@ class HealthMonitor:
|
|||||||
"""
|
"""
|
||||||
Check logs with persistent error tracking.
|
Check logs with persistent error tracking.
|
||||||
Critical log errors persist for 24h unless acknowledged.
|
Critical log errors persist for 24h unless acknowledged.
|
||||||
|
Groups similar errors to avoid false high counts.
|
||||||
"""
|
"""
|
||||||
cache_key = 'logs_analysis'
|
cache_key = 'logs_analysis'
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
@@ -1196,20 +1197,24 @@ class HealthMonitor:
|
|||||||
if result.returncode == 0:
|
if result.returncode == 0:
|
||||||
lines = result.stdout.strip().split('\n')
|
lines = result.stdout.strip().split('\n')
|
||||||
|
|
||||||
errors_5m = 0
|
error_patterns = {} # pattern -> count
|
||||||
warnings_5m = 0
|
critical_keywords_found = set()
|
||||||
critical_keywords_found = []
|
|
||||||
|
|
||||||
for line in lines:
|
for line in lines:
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
|
||||||
line_lower = line.lower()
|
line_lower = line.lower()
|
||||||
|
|
||||||
|
# Check for critical keywords first
|
||||||
|
critical_found = False
|
||||||
for keyword in self.CRITICAL_LOG_KEYWORDS:
|
for keyword in self.CRITICAL_LOG_KEYWORDS:
|
||||||
if keyword.lower() in line_lower:
|
if keyword.lower() in line_lower:
|
||||||
critical_keywords_found.append(keyword)
|
critical_keywords_found.add(keyword)
|
||||||
errors_5m += 1
|
critical_found = True
|
||||||
|
|
||||||
# Record persistent error for critical keywords
|
# Record persistent error for critical keywords
|
||||||
error_key = f'log_critical_{keyword.replace(" ", "_")}'
|
error_key = f'log_critical_{keyword.replace(" ", "_").replace("/", "_")}'
|
||||||
health_persistence.record_error(
|
health_persistence.record_error(
|
||||||
error_key=error_key,
|
error_key=error_key,
|
||||||
category='logs',
|
category='logs',
|
||||||
@@ -1218,27 +1223,42 @@ class HealthMonitor:
|
|||||||
details={'keyword': keyword}
|
details={'keyword': keyword}
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
else:
|
|
||||||
if 'error' in line_lower or 'critical' in line_lower or 'fatal' in line_lower:
|
if critical_found:
|
||||||
errors_5m += 1
|
continue
|
||||||
elif 'warning' in line_lower or 'warn' in line_lower:
|
|
||||||
warnings_5m += 1
|
# Remove timestamps, PIDs, and specific IDs to group similar errors
|
||||||
|
pattern = re.sub(r'\d{4}-\d{2}-\d{2}', '', line_lower) # Remove dates
|
||||||
|
pattern = re.sub(r'\d{2}:\d{2}:\d{2}', '', pattern) # Remove times
|
||||||
|
pattern = re.sub(r'pid[:\s]+\d+', 'pid:XXX', pattern) # Normalize PIDs
|
||||||
|
pattern = re.sub(r'\b\d{3,6}\b', 'ID', pattern) # Normalize IDs
|
||||||
|
pattern = re.sub(r'/dev/\S+', '/dev/XXX', pattern) # Normalize devices
|
||||||
|
pattern = pattern[:100] # Keep first 100 chars as pattern
|
||||||
|
|
||||||
|
# Classify error level
|
||||||
|
if 'error' in line_lower or 'critical' in line_lower or 'fatal' in line_lower:
|
||||||
|
error_patterns[f'error:{pattern}'] = error_patterns.get(f'error:{pattern}', 0) + 1
|
||||||
|
elif 'warning' in line_lower or 'warn' in line_lower:
|
||||||
|
error_patterns[f'warning:{pattern}'] = error_patterns.get(f'warning:{pattern}', 0) + 1
|
||||||
|
|
||||||
|
unique_errors = sum(1 for k in error_patterns.keys() if k.startswith('error:'))
|
||||||
|
unique_warnings = sum(1 for k in error_patterns.keys() if k.startswith('warning:'))
|
||||||
|
|
||||||
if critical_keywords_found:
|
if critical_keywords_found:
|
||||||
status = 'CRITICAL'
|
status = 'CRITICAL'
|
||||||
reason = f'Critical errors: {", ".join(set(critical_keywords_found[:3]))}'
|
reason = f'Critical errors: {", ".join(list(critical_keywords_found)[:3])}'
|
||||||
elif errors_5m >= self.LOG_ERRORS_CRITICAL:
|
elif unique_errors >= self.LOG_ERRORS_CRITICAL:
|
||||||
status = 'CRITICAL'
|
status = 'CRITICAL'
|
||||||
reason = f'{errors_5m} errors in 5 minutes'
|
reason = f'{unique_errors} unique errors in 5 minutes'
|
||||||
elif warnings_5m >= self.LOG_WARNINGS_CRITICAL:
|
elif unique_warnings >= self.LOG_WARNINGS_CRITICAL:
|
||||||
status = 'WARNING'
|
status = 'WARNING'
|
||||||
reason = f'{warnings_5m} warnings in 5 minutes'
|
reason = f'{unique_warnings} unique warnings in 5 minutes'
|
||||||
elif errors_5m >= self.LOG_ERRORS_WARNING:
|
elif unique_errors >= self.LOG_ERRORS_WARNING:
|
||||||
status = 'WARNING'
|
status = 'WARNING'
|
||||||
reason = f'{errors_5m} errors in 5 minutes'
|
reason = f'{unique_errors} unique errors in 5 minutes'
|
||||||
elif warnings_5m >= self.LOG_WARNINGS_WARNING:
|
elif unique_warnings >= self.LOG_WARNINGS_WARNING:
|
||||||
status = 'WARNING'
|
status = 'WARNING'
|
||||||
reason = f'{warnings_5m} warnings in 5 minutes'
|
reason = f'{unique_warnings} unique warnings in 5 minutes'
|
||||||
else:
|
else:
|
||||||
status = 'OK'
|
status = 'OK'
|
||||||
reason = None
|
reason = None
|
||||||
|
|||||||
@@ -101,18 +101,23 @@ class HealthPersistence:
|
|||||||
if existing:
|
if existing:
|
||||||
error_id, first_seen, notif_sent, acknowledged, resolved_at = existing
|
error_id, first_seen, notif_sent, acknowledged, resolved_at = existing
|
||||||
|
|
||||||
# If acknowledged within last 24 hours, do not re-add unless resolved and re-occurred
|
if acknowledged == 1 and resolved_at is not None:
|
||||||
if acknowledged and not resolved_at:
|
# Check if acknowledged recently (within 24h)
|
||||||
first_seen_dt = datetime.fromisoformat(first_seen)
|
try:
|
||||||
if (datetime.now() - first_seen_dt).total_seconds() < 86400: # 24 hours
|
resolved_dt = datetime.fromisoformat(resolved_at)
|
||||||
# Skip re-adding recently acknowledged errors
|
hours_since_ack = (datetime.now() - resolved_dt).total_seconds() / 3600
|
||||||
conn.close()
|
|
||||||
return {'type': 'skipped', 'needs_notification': False}
|
if hours_since_ack < 24:
|
||||||
|
# Skip re-adding recently acknowledged errors
|
||||||
|
conn.close()
|
||||||
|
return {'type': 'skipped_acknowledged', 'needs_notification': False}
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# Update existing error
|
# Update existing error (only if not acknowledged or >24h passed)
|
||||||
cursor.execute('''
|
cursor.execute('''
|
||||||
UPDATE errors
|
UPDATE errors
|
||||||
SET last_seen = ?, severity = ?, reason = ?, details = ?, resolved_at = NULL
|
SET last_seen = ?, severity = ?, reason = ?, details = ?, resolved_at = NULL, acknowledged = 0
|
||||||
WHERE error_key = ?
|
WHERE error_key = ?
|
||||||
''', (now, severity, reason, details_json, error_key))
|
''', (now, severity, reason, details_json, error_key))
|
||||||
|
|
||||||
@@ -123,6 +128,25 @@ class HealthPersistence:
|
|||||||
event_info['type'] = 'escalated'
|
event_info['type'] = 'escalated'
|
||||||
event_info['needs_notification'] = True
|
event_info['needs_notification'] = True
|
||||||
else:
|
else:
|
||||||
|
cursor.execute('''
|
||||||
|
SELECT resolved_at, acknowledged FROM errors
|
||||||
|
WHERE error_key = ? AND acknowledged = 1
|
||||||
|
ORDER BY resolved_at DESC LIMIT 1
|
||||||
|
''', (error_key,))
|
||||||
|
recent_ack = cursor.fetchone()
|
||||||
|
|
||||||
|
if recent_ack and recent_ack[0]:
|
||||||
|
try:
|
||||||
|
resolved_dt = datetime.fromisoformat(recent_ack[0])
|
||||||
|
hours_since_ack = (datetime.now() - resolved_dt).total_seconds() / 3600
|
||||||
|
|
||||||
|
if hours_since_ack < 24:
|
||||||
|
# Don't re-add recently acknowledged errors
|
||||||
|
conn.close()
|
||||||
|
return {'type': 'skipped_acknowledged', 'needs_notification': False}
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# Insert new error
|
# Insert new error
|
||||||
cursor.execute('''
|
cursor.execute('''
|
||||||
INSERT INTO errors
|
INSERT INTO errors
|
||||||
|
|||||||
Reference in New Issue
Block a user