update health_persistence.py

This commit is contained in:
MacRimi
2026-04-17 10:38:39 +02:00
parent 4b6a91e74c
commit baa2ff4fa9
4 changed files with 28 additions and 65 deletions

View File

@@ -1462,38 +1462,15 @@ class HealthMonitor:
except Exception:
pass
# Check disk_observations for active (non-dismissed) warnings
# This ensures disks with persistent observations appear in Health Monitor
# even if the error is not currently in the logs
try:
all_observations = health_persistence.get_disk_observations()
for obs in all_observations:
device_name = obs.get('device_name', '').replace('/dev/', '')
if not device_name:
continue
severity = (obs.get('severity') or 'warning').upper()
# Only include if WARNING/CRITICAL and not already dismissed
if severity in ('WARNING', 'CRITICAL') and not obs.get('dismissed'):
# Check if there's a corresponding acknowledged error in the errors table
# If so, skip this observation (it was dismissed via Health Monitor)
error_key = f"disk_smart_{device_name}"
error_record = health_persistence.get_error_by_key(error_key)
if error_record and error_record.get('acknowledged'):
continue # Skip - this was dismissed
# Add to disk_errors_by_device if not already present
if device_name not in disk_errors_by_device:
obs_reason = obs.get('raw_message', f'{device_name}: Disk observation recorded')
disk_errors_by_device[device_name] = {
'status': severity,
'reason': obs_reason,
'error_type': obs.get('error_type', 'disk_observation'),
'serial': obs.get('serial', ''),
'model': obs.get('model', ''),
'dismissable': True,
}
except Exception:
pass
# NOTE: disk_observations is the PERMANENT historical record of disk events
# and must NOT be used as a source for Health Monitor warnings.
# Only the `errors` table (active alerts) drives the Health Monitor view.
# Observations are visible separately in the disk detail UI, where users
# can review the full history and dismiss individual entries if desired.
#
# Previous behavior read disk_observations here and created phantom warnings
# that persisted even after the underlying error was gone — conflating the
# permanent history with the current health state.
# Add consolidated disk entries (only for disks with errors)
for device_name, error_info in disk_errors_by_device.items():

View File

@@ -343,6 +343,12 @@ class HealthPersistence:
# re-processed every cycle, causing infinite notification loops.
# On upgrade, clean up any stale errors that are stuck in the
# active state from the old buggy behavior.
#
# IMPORTANT: Only cleans the `errors` table (health monitor state).
# The `disk_observations` table is a PERMANENT historical record
# and must NEVER be auto-modified on startup. Users dismiss
# observations manually from the disk detail UI.
#
# Covers: disk I/O (smart_*, disk_*), VM/CT (vm_*, ct_*, vmct_*),
# and log errors (log_*) — all journal-sourced categories.
try:
@@ -363,26 +369,9 @@ class HealthPersistence:
''', (cutoff,))
cleaned_errors = cursor.rowcount
# Also dismiss stale disk observations that are still active
# but haven't been updated recently — leftovers from the
# feedback loop bug where occurrence_count kept incrementing.
# Detect column names for backward compatibility
cursor.execute('PRAGMA table_info(disk_observations)')
obs_cols = [col[1] for col in cursor.fetchall()]
last_col = 'last_occurrence' if 'last_occurrence' in obs_cols else 'last_seen'
cursor.execute(f'''
UPDATE disk_observations
SET dismissed = 1
WHERE dismissed = 0
AND {last_col} < ?
''', (cutoff,))
cleaned_obs = cursor.rowcount
total_cleaned = cleaned_errors + cleaned_obs
if total_cleaned > 0:
if cleaned_errors > 0:
conn.commit()
print(f"[HealthPersistence] Startup cleanup: removed {cleaned_errors} stale error(s), dismissed {cleaned_obs} stale observation(s)")
print(f"[HealthPersistence] Startup cleanup: removed {cleaned_errors} stale error(s) from health monitor")
except Exception as e:
print(f"[HealthPersistence] Startup cleanup warning: {e}")