Update health monitor

This commit is contained in:
MacRimi
2026-02-16 22:53:16 +01:00
parent a1d48a28e9
commit 8004ee48c9
4 changed files with 62 additions and 6 deletions

View File

@@ -105,6 +105,8 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
setError(null)
try {
let newOverallStatus = "OK"
// Use the new combined endpoint for fewer round-trips
const response = await fetch(getApiUrl("/api/health/full"))
if (!response.ok) {
@@ -114,14 +116,17 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
const data = await legacyResponse.json()
setHealthData(data)
setDismissedItems([])
newOverallStatus = data?.overall || "OK"
} else {
const fullData: FullHealthData = await response.json()
setHealthData(fullData.health)
setDismissedItems(fullData.dismissed || [])
newOverallStatus = fullData.health?.overall || "OK"
}
// Emit event with the FRESH data from the response, not the stale state
const event = new CustomEvent("healthStatusUpdated", {
detail: { status: healthData?.overall || "OK" },
detail: { status: newOverallStatus },
})
window.dispatchEvent(event)
} catch (err) {
@@ -129,7 +134,7 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
} finally {
setLoading(false)
}
}, [getApiUrl, healthData?.overall])
}, [getApiUrl])
useEffect(() => {
if (open) {

View File

@@ -83,6 +83,12 @@ def acknowledge_error():
health_monitor.last_check_times.pop(cache_key, None)
health_monitor.cached_results.pop(cache_key, None)
# Also invalidate overall status caches so header updates immediately
health_monitor.last_check_times.pop('_bg_overall', None)
health_monitor.cached_results.pop('_bg_overall', None)
health_monitor.last_check_times.pop('overall_health', None)
health_monitor.cached_results.pop('overall_health', None)
# Determine suppression period for the response
category = result.get('category', '')
if category == 'updates':

View File

@@ -569,6 +569,34 @@ def _temperature_collector_loop():
time.sleep(60)
def _health_collector_loop():
"""Background thread: run full health checks every 5 minutes.
Keeps the health cache always fresh and records events/errors in the DB
so the future notification service can consume them."""
from health_monitor import health_monitor
# Wait 30s after startup to let other services initialize
time.sleep(30)
while True:
try:
# Run full health check (results get cached internally + recorded in DB)
result = health_monitor.get_detailed_status()
# Update the quick-status cache so the header stays fresh without extra work
overall = result.get('overall', 'OK')
summary = result.get('summary', 'All systems operational')
health_monitor.cached_results['_bg_overall'] = {
'status': overall,
'summary': summary
}
health_monitor.last_check_times['_bg_overall'] = time.time()
except Exception as e:
print(f"[ProxMenux] Health collector error: {e}")
time.sleep(300) # Every 5 minutes
def get_uptime():
"""Get system uptime in a human-readable format."""
try:
@@ -7006,6 +7034,15 @@ if __name__ == '__main__':
else:
print("[ProxMenux] Temperature history disabled (DB init failed)")
# ── Background Health Monitor ──
# Run full health checks every 5 min, keeping cache fresh and recording events for notifications
try:
health_thread = threading.Thread(target=_health_collector_loop, daemon=True)
health_thread.start()
print("[ProxMenux] Background health monitor started (5 min interval)")
except Exception as e:
print(f"[ProxMenux] Background health monitor failed to start: {e}")
# Check for SSL configuration
ssl_ctx = None
try:

View File

@@ -201,17 +201,25 @@ class HealthMonitor:
def get_cached_health_status(self) -> Dict[str, str]:
"""
Get cached health status without running expensive checks.
Returns the last calculated status or triggers a check if too old.
The background health collector keeps '_bg_overall' always fresh (every 5 min).
Falls back to calculating on demand if background data is stale or unavailable.
"""
cache_key = 'overall_health'
current_time = time.time()
# If cache exists and is less than 60 seconds old, return it
# 1. Check background collector cache (updated every 5 min by _health_collector_loop)
bg_key = '_bg_overall'
if bg_key in self.last_check_times:
age = current_time - self.last_check_times[bg_key]
if age < 360: # 6 min (5 min interval + 1 min tolerance)
return self.cached_results.get(bg_key, {'status': 'OK', 'summary': 'System operational'})
# 2. Check regular cache (updated by modal fetches or on-demand)
cache_key = 'overall_health'
if cache_key in self.last_check_times:
if current_time - self.last_check_times[cache_key] < 60:
return self.cached_results.get(cache_key, {'status': 'OK', 'summary': 'System operational'})
# Otherwise, calculate and cache
# 3. No fresh cache - calculate on demand (happens only on first load before bg thread runs)
status = self.get_overall_status()
self.cached_results[cache_key] = {
'status': status['status'],