mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-30 11:26:23 +00:00
Update notification service
This commit is contained in:
@@ -90,7 +90,7 @@ cp "$SCRIPT_DIR/hardware_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "
|
|||||||
cp "$SCRIPT_DIR/proxmox_storage_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ proxmox_storage_monitor.py not found"
|
cp "$SCRIPT_DIR/proxmox_storage_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ proxmox_storage_monitor.py not found"
|
||||||
cp "$SCRIPT_DIR/flask_script_runner.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_script_runner.py not found"
|
cp "$SCRIPT_DIR/flask_script_runner.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_script_runner.py not found"
|
||||||
cp "$SCRIPT_DIR/security_manager.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ security_manager.py not found"
|
cp "$SCRIPT_DIR/security_manager.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ security_manager.py not found"
|
||||||
cp "$SCRIPT_DIR/flask_security_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠<EFBFBD><EFBFBD><EFBFBD> flask_security_routes.py not found"
|
cp "$SCRIPT_DIR/flask_security_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_security_routes.py not found"
|
||||||
cp "$SCRIPT_DIR/notification_manager.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ notification_manager.py not found"
|
cp "$SCRIPT_DIR/notification_manager.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ notification_manager.py not found"
|
||||||
cp "$SCRIPT_DIR/notification_channels.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ notification_channels.py not found"
|
cp "$SCRIPT_DIR/notification_channels.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ notification_channels.py not found"
|
||||||
cp "$SCRIPT_DIR/notification_templates.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ notification_templates.py not found"
|
cp "$SCRIPT_DIR/notification_templates.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ notification_templates.py not found"
|
||||||
|
|||||||
@@ -33,6 +33,19 @@ except ImportError:
|
|||||||
# ============================================================================
|
# ============================================================================
|
||||||
DEBUG_PERF = False
|
DEBUG_PERF = False
|
||||||
|
|
||||||
|
# Startup grace period: suppress transient issues during boot
|
||||||
|
# This is set when the module loads (service start)
|
||||||
|
_MODULE_START_TIME = time.time()
|
||||||
|
_STARTUP_HEALTH_GRACE_SECONDS = 300 # 5 minutes
|
||||||
|
|
||||||
|
def _is_startup_health_grace() -> bool:
|
||||||
|
"""Check if we're within the startup health grace period (5 min).
|
||||||
|
|
||||||
|
Used to downgrade transient errors (high latency, storage not ready)
|
||||||
|
to INFO level during system boot, preventing false CRITICAL alerts.
|
||||||
|
"""
|
||||||
|
return (time.time() - _MODULE_START_TIME) < _STARTUP_HEALTH_GRACE_SECONDS
|
||||||
|
|
||||||
def _perf_log(section: str, elapsed_ms: float):
|
def _perf_log(section: str, elapsed_ms: float):
|
||||||
"""Log performance timing for a section. Only logs if DEBUG_PERF is True."""
|
"""Log performance timing for a section. Only logs if DEBUG_PERF is True."""
|
||||||
if DEBUG_PERF:
|
if DEBUG_PERF:
|
||||||
@@ -2512,12 +2525,24 @@ class HealthMonitor:
|
|||||||
return loss_result
|
return loss_result
|
||||||
|
|
||||||
# Evaluate latency thresholds
|
# Evaluate latency thresholds
|
||||||
|
# During startup grace period, downgrade CRITICAL/WARNING to INFO
|
||||||
|
# to avoid false alerts from transient boot-time latency spikes
|
||||||
|
in_grace_period = _is_startup_health_grace()
|
||||||
|
|
||||||
if avg_latency > self.NETWORK_LATENCY_CRITICAL:
|
if avg_latency > self.NETWORK_LATENCY_CRITICAL:
|
||||||
status = 'CRITICAL'
|
if in_grace_period:
|
||||||
reason = f'Latency {avg_latency:.1f}ms to gateway >{self.NETWORK_LATENCY_CRITICAL}ms'
|
status = 'INFO'
|
||||||
|
reason = f'Latency {avg_latency:.1f}ms (startup grace, will stabilize)'
|
||||||
|
else:
|
||||||
|
status = 'CRITICAL'
|
||||||
|
reason = f'Latency {avg_latency:.1f}ms to gateway >{self.NETWORK_LATENCY_CRITICAL}ms'
|
||||||
elif avg_latency > self.NETWORK_LATENCY_WARNING:
|
elif avg_latency > self.NETWORK_LATENCY_WARNING:
|
||||||
status = 'WARNING'
|
if in_grace_period:
|
||||||
reason = f'Latency {avg_latency:.1f}ms to gateway >{self.NETWORK_LATENCY_WARNING}ms'
|
status = 'INFO'
|
||||||
|
reason = f'Latency {avg_latency:.1f}ms (startup grace, will stabilize)'
|
||||||
|
else:
|
||||||
|
status = 'WARNING'
|
||||||
|
reason = f'Latency {avg_latency:.1f}ms to gateway >{self.NETWORK_LATENCY_WARNING}ms'
|
||||||
else:
|
else:
|
||||||
status = 'OK'
|
status = 'OK'
|
||||||
reason = None
|
reason = None
|
||||||
|
|||||||
@@ -221,7 +221,7 @@ def capture_journal_context(keywords: list, lines: int = 30,
|
|||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
# ─── Journal Watcher (Real-time) ─────────────────────────────────
|
# ─── Journal Watcher (Real-time) ───<EFBFBD><EFBFBD>─────────────────────────────
|
||||||
|
|
||||||
class JournalWatcher:
|
class JournalWatcher:
|
||||||
"""Watches journald in real-time for critical system events.
|
"""Watches journald in real-time for critical system events.
|
||||||
@@ -1640,13 +1640,9 @@ class TaskWatcher:
|
|||||||
# let PollingCollector emit one "System startup: X VMs, Y CTs started".
|
# let PollingCollector emit one "System startup: X VMs, Y CTs started".
|
||||||
_STARTUP_EVENTS = {'vm_start', 'ct_start'}
|
_STARTUP_EVENTS = {'vm_start', 'ct_start'}
|
||||||
if event_type in _STARTUP_EVENTS and not is_error:
|
if event_type in _STARTUP_EVENTS and not is_error:
|
||||||
is_startup = _shared_state.is_startup_period()
|
if _shared_state.is_startup_period():
|
||||||
elapsed = time.time() - _shared_state._startup_time
|
|
||||||
print(f"[TaskWatcher] {event_type} for {vmid}: is_startup_period={is_startup}, elapsed={elapsed:.1f}s")
|
|
||||||
if is_startup:
|
|
||||||
vm_type = 'ct' if event_type == 'ct_start' else 'vm'
|
vm_type = 'ct' if event_type == 'ct_start' else 'vm'
|
||||||
_shared_state.add_startup_vm(vmid, vmname or f'ID {vmid}', vm_type)
|
_shared_state.add_startup_vm(vmid, vmname or f'ID {vmid}', vm_type)
|
||||||
print(f"[TaskWatcher] Aggregated {event_type} for {vmid}, total pending: {len(_shared_state._startup_vms)}")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
self._queue.put(NotificationEvent(
|
self._queue.put(NotificationEvent(
|
||||||
@@ -2189,16 +2185,11 @@ class PollingCollector:
|
|||||||
if _shared_state.was_startup_aggregated():
|
if _shared_state.was_startup_aggregated():
|
||||||
return
|
return
|
||||||
|
|
||||||
print(f"[PollingCollector] Startup period ended, checking for aggregated VMs...")
|
|
||||||
|
|
||||||
# Get all collected startup VMs/CTs
|
# Get all collected startup VMs/CTs
|
||||||
startup_items = _shared_state.get_and_clear_startup_vms()
|
startup_items = _shared_state.get_and_clear_startup_vms()
|
||||||
if not startup_items:
|
if not startup_items:
|
||||||
print(f"[PollingCollector] No VMs/CTs collected during startup period")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
print(f"[PollingCollector] Emitting aggregated startup notification for {len(startup_items)} items")
|
|
||||||
|
|
||||||
# Count VMs and CTs
|
# Count VMs and CTs
|
||||||
vms = [(vmid, name) for vmid, name, vtype in startup_items if vtype == 'vm']
|
vms = [(vmid, name) for vmid, name, vtype in startup_items if vtype == 'vm']
|
||||||
cts = [(vmid, name) for vmid, name, vtype in startup_items if vtype == 'ct']
|
cts = [(vmid, name) for vmid, name, vtype in startup_items if vtype == 'ct']
|
||||||
@@ -2289,7 +2280,7 @@ class PollingCollector:
|
|||||||
if total == 0:
|
if total == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
# ── Parse every Inst line ────────────────<EFBFBD><EFBFBD>─────────────
|
# ── Parse every Inst line ──────────────────────────────
|
||||||
all_pkgs: list[dict] = [] # {name, cur, new}
|
all_pkgs: list[dict] = [] # {name, cur, new}
|
||||||
security_pkgs: list[dict] = []
|
security_pkgs: list[dict] = []
|
||||||
pve_pkgs: list[dict] = []
|
pve_pkgs: list[dict] = []
|
||||||
|
|||||||
Reference in New Issue
Block a user