mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-29 02:46:38 +00:00
Update notification service
This commit is contained in:
@@ -9,6 +9,7 @@ import { Label } from "./ui/label"
|
|||||||
import { Badge } from "./ui/badge"
|
import { Badge } from "./ui/badge"
|
||||||
import { Button } from "./ui/button"
|
import { Button } from "./ui/button"
|
||||||
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "./ui/select"
|
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "./ui/select"
|
||||||
|
import { Switch } from "./ui/switch"
|
||||||
import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogDescription } from "./ui/dialog"
|
import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogDescription } from "./ui/dialog"
|
||||||
import { fetchApi } from "../lib/api-config"
|
import { fetchApi } from "../lib/api-config"
|
||||||
import {
|
import {
|
||||||
|
|||||||
@@ -175,7 +175,7 @@ class HealthMonitor:
|
|||||||
r'proxmenux-monitor.*failed at step exec',
|
r'proxmenux-monitor.*failed at step exec',
|
||||||
r'proxmenux-monitor\.appimage',
|
r'proxmenux-monitor\.appimage',
|
||||||
|
|
||||||
# ─<EFBFBD><EFBFBD><EFBFBD> PVE scheduler operational noise ──
|
# ── PVE scheduler operational noise ──
|
||||||
# pvescheduler emits "could not update job state" every minute
|
# pvescheduler emits "could not update job state" every minute
|
||||||
# when a scheduled job reference is stale. This is cosmetic,
|
# when a scheduled job reference is stale. This is cosmetic,
|
||||||
# not a system problem.
|
# not a system problem.
|
||||||
@@ -2118,7 +2118,7 @@ class HealthMonitor:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# ── Record disk observation (always, even if transient) <EFBFBD><EFBFBD><EFBFBD>─
|
# ── Record disk observation (always, even if transient) ──
|
||||||
# Signature must be stable across cycles: strip volatile
|
# Signature must be stable across cycles: strip volatile
|
||||||
# data (hex values, counts, timestamps) to dedup properly.
|
# data (hex values, counts, timestamps) to dedup properly.
|
||||||
# e.g. "ata8.00: exception Emask 0x1 SAct 0xc1000000"
|
# e.g. "ata8.00: exception Emask 0x1 SAct 0xc1000000"
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ class _SharedState:
|
|||||||
_shared_state = _SharedState()
|
_shared_state = _SharedState()
|
||||||
|
|
||||||
|
|
||||||
# ─── Event Object ──────────────<EFBFBD><EFBFBD>──────────────────────────────────
|
# ─── Event Object ─────────────────────────────────────────────────
|
||||||
|
|
||||||
class NotificationEvent:
|
class NotificationEvent:
|
||||||
"""Represents a detected event ready for notification dispatch.
|
"""Represents a detected event ready for notification dispatch.
|
||||||
@@ -2154,6 +2154,9 @@ class PollingCollector:
|
|||||||
- Journal errors (for AI enrichment)
|
- Journal errors (for AI enrichment)
|
||||||
|
|
||||||
Emits a single "system_startup" notification with full report data.
|
Emits a single "system_startup" notification with full report data.
|
||||||
|
|
||||||
|
IMPORTANT: Only emits if this is a REAL system boot, not a service restart.
|
||||||
|
Checks system uptime to distinguish between the two cases.
|
||||||
"""
|
"""
|
||||||
# Wait until health grace period is over (5 min) for complete picture
|
# Wait until health grace period is over (5 min) for complete picture
|
||||||
if startup_grace.is_startup_health_grace():
|
if startup_grace.is_startup_health_grace():
|
||||||
@@ -2163,6 +2166,14 @@ class PollingCollector:
|
|||||||
if startup_grace.was_startup_aggregated():
|
if startup_grace.was_startup_aggregated():
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# CRITICAL: Check if this is a real system boot
|
||||||
|
# If the system was already running for > 10 min when service started,
|
||||||
|
# this is just a service restart, not a system boot - skip notification
|
||||||
|
if not startup_grace.is_real_system_boot():
|
||||||
|
# Mark as aggregated to prevent future checks, but don't send notification
|
||||||
|
startup_grace.mark_startup_aggregated()
|
||||||
|
return
|
||||||
|
|
||||||
# Collect comprehensive startup report
|
# Collect comprehensive startup report
|
||||||
report = startup_grace.collect_startup_report()
|
report = startup_grace.collect_startup_report()
|
||||||
|
|
||||||
@@ -2332,7 +2343,7 @@ class PollingCollector:
|
|||||||
for pkg in all_pkgs:
|
for pkg in all_pkgs:
|
||||||
if pkg['name'] in self._IMPORTANT_PKGS and pkg['cur']:
|
if pkg['name'] in self._IMPORTANT_PKGS and pkg['cur']:
|
||||||
important_lines.append(
|
important_lines.append(
|
||||||
f"{pkg['name']} ({pkg['cur']} -> {pkg['new']})"
|
f"{pkg['name']} ({pkg['cur']} → {pkg['new']})"
|
||||||
)
|
)
|
||||||
|
|
||||||
# ── Emit structured update_summary ─────────────────────
|
# ── Emit structured update_summary ─────────────────────
|
||||||
@@ -2358,7 +2369,7 @@ class PollingCollector:
|
|||||||
'current_version': pve_manager_info['cur'],
|
'current_version': pve_manager_info['cur'],
|
||||||
'new_version': pve_manager_info['new'],
|
'new_version': pve_manager_info['new'],
|
||||||
'version': pve_manager_info['new'],
|
'version': pve_manager_info['new'],
|
||||||
'details': f"pve-manager {pve_manager_info['cur']} -> {pve_manager_info['new']}",
|
'details': f"pve-manager {pve_manager_info['cur']} → {pve_manager_info['new']}",
|
||||||
}
|
}
|
||||||
self._queue.put(NotificationEvent(
|
self._queue.put(NotificationEvent(
|
||||||
'pve_update', 'INFO', pve_data,
|
'pve_update', 'INFO', pve_data,
|
||||||
|
|||||||
@@ -28,6 +28,22 @@ STARTUP_VM_GRACE_SECONDS = 180 # 3 minutes for VM/CT start aggregation
|
|||||||
STARTUP_HEALTH_GRACE_SECONDS = 300 # 5 minutes for health warning suppression
|
STARTUP_HEALTH_GRACE_SECONDS = 300 # 5 minutes for health warning suppression
|
||||||
SHUTDOWN_GRACE_SECONDS = 120 # 2 minutes for VM/CT stop suppression
|
SHUTDOWN_GRACE_SECONDS = 120 # 2 minutes for VM/CT stop suppression
|
||||||
|
|
||||||
|
# Maximum system uptime to consider this a real server boot (not just service restart)
|
||||||
|
# If system uptime > this value when service starts, skip startup notification
|
||||||
|
MAX_BOOT_UPTIME_SECONDS = 600 # 10 minutes - if system was up longer, it's a service restart
|
||||||
|
|
||||||
|
|
||||||
|
def _get_system_uptime() -> float:
|
||||||
|
"""
|
||||||
|
Get actual system uptime in seconds from /proc/uptime.
|
||||||
|
Returns 0 if unable to read (will default to treating as new boot).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open('/proc/uptime', 'r') as f:
|
||||||
|
return float(f.readline().split()[0])
|
||||||
|
except Exception:
|
||||||
|
return 0
|
||||||
|
|
||||||
# Categories to suppress during startup grace period
|
# Categories to suppress during startup grace period
|
||||||
# These categories typically have transient issues during boot
|
# These categories typically have transient issues during boot
|
||||||
STARTUP_GRACE_CATEGORIES: Set[str] = {
|
STARTUP_GRACE_CATEGORIES: Set[str] = {
|
||||||
@@ -68,6 +84,11 @@ class _StartupGraceState:
|
|||||||
# Startup time = when service started (module load time)
|
# Startup time = when service started (module load time)
|
||||||
self._startup_time: float = time.time()
|
self._startup_time: float = time.time()
|
||||||
|
|
||||||
|
# Check if this is a REAL system boot or just a service restart
|
||||||
|
# by comparing system uptime to our threshold
|
||||||
|
system_uptime = _get_system_uptime()
|
||||||
|
self._is_real_boot: bool = system_uptime < MAX_BOOT_UPTIME_SECONDS
|
||||||
|
|
||||||
# Shutdown tracking
|
# Shutdown tracking
|
||||||
self._shutdown_time: float = 0
|
self._shutdown_time: float = 0
|
||||||
|
|
||||||
@@ -115,6 +136,19 @@ class _StartupGraceState:
|
|||||||
return self.is_startup_health_grace()
|
return self.is_startup_health_grace()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def is_real_system_boot(self) -> bool:
|
||||||
|
"""
|
||||||
|
Check if the service started during a real system boot.
|
||||||
|
|
||||||
|
Returns False if the system was already running for more than 10 minutes
|
||||||
|
when the service started (indicates a service restart, not a system boot).
|
||||||
|
|
||||||
|
This prevents sending "System startup completed" notifications when
|
||||||
|
just restarting the ProxMenux Monitor service.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
return self._is_real_boot
|
||||||
|
|
||||||
def get_startup_elapsed(self) -> float:
|
def get_startup_elapsed(self) -> float:
|
||||||
"""Get seconds elapsed since service startup."""
|
"""Get seconds elapsed since service startup."""
|
||||||
with self._lock:
|
with self._lock:
|
||||||
@@ -230,6 +264,19 @@ def was_startup_aggregated() -> bool:
|
|||||||
"""Check if startup aggregation has already been processed."""
|
"""Check if startup aggregation has already been processed."""
|
||||||
return _state.was_startup_aggregated()
|
return _state.was_startup_aggregated()
|
||||||
|
|
||||||
|
def is_real_system_boot() -> bool:
|
||||||
|
"""
|
||||||
|
Check if this is a real system boot (not just a service restart).
|
||||||
|
|
||||||
|
Returns True if the system uptime was less than 10 minutes when the
|
||||||
|
service started. Returns False if the system was already running
|
||||||
|
longer (indicates the service was restarted, not the whole system).
|
||||||
|
|
||||||
|
Use this to prevent sending "System startup completed" notifications
|
||||||
|
when just restarting the ProxMenux Monitor service.
|
||||||
|
"""
|
||||||
|
return _state.is_real_system_boot()
|
||||||
|
|
||||||
|
|
||||||
# ─── Startup Report Collection ───────────────────────────────────────────────
|
# ─── Startup Report Collection ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user