mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-27 09:56:24 +00:00
update notification_events.py
This commit is contained in:
@@ -29,6 +29,17 @@ export default function Home() {
|
||||
const response = await fetch(getApiUrl("/api/auth/status"), {
|
||||
headers: token ? { Authorization: `Bearer ${token}` } : {},
|
||||
})
|
||||
|
||||
// Check if response is valid JSON before parsing
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`)
|
||||
}
|
||||
|
||||
const contentType = response.headers.get("content-type")
|
||||
if (!contentType || !contentType.includes("application/json")) {
|
||||
throw new Error("Response is not JSON")
|
||||
}
|
||||
|
||||
const data = await response.json()
|
||||
|
||||
const authenticated = data.auth_enabled ? data.authenticated : true
|
||||
@@ -39,8 +50,8 @@ export default function Home() {
|
||||
authConfigured: data.auth_configured,
|
||||
authenticated,
|
||||
})
|
||||
} catch (error) {
|
||||
console.error("Failed to check auth status:", error)
|
||||
} catch {
|
||||
// API not available - assume no auth configured (silent fail, no console error)
|
||||
setAuthStatus({
|
||||
loading: false,
|
||||
authEnabled: false,
|
||||
|
||||
@@ -27,18 +27,26 @@ export function AuthSetup({ onComplete }: AuthSetupProps) {
|
||||
const checkOnboardingStatus = async () => {
|
||||
try {
|
||||
const response = await fetch(getApiUrl("/api/auth/status"))
|
||||
|
||||
// Check if response is valid JSON before parsing
|
||||
if (!response.ok) {
|
||||
// API not available - don't show modal in preview
|
||||
return
|
||||
}
|
||||
|
||||
const contentType = response.headers.get("content-type")
|
||||
if (!contentType || !contentType.includes("application/json")) {
|
||||
return
|
||||
}
|
||||
|
||||
const data = await response.json()
|
||||
|
||||
console.log("[v0] Auth status for modal check:", data)
|
||||
|
||||
// Show modal if auth is not configured and not declined
|
||||
if (!data.auth_configured) {
|
||||
setTimeout(() => setOpen(true), 500)
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("[v0] Failed to check auth status:", error)
|
||||
// Fail-safe: show modal if we can't check status
|
||||
setTimeout(() => setOpen(true), 500)
|
||||
} catch {
|
||||
// API not available (preview environment) - don't show modal
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -299,6 +299,19 @@ export function NotificationSettings() {
|
||||
fallback_commands: string[]
|
||||
error: string
|
||||
}>({ status: "idle", fallback_commands: [], error: "" })
|
||||
const [systemHostname, setSystemHostname] = useState<string>("")
|
||||
|
||||
// Load system hostname for display name placeholder
|
||||
const loadSystemHostname = useCallback(async () => {
|
||||
try {
|
||||
const data = await fetchApi<{ hostname?: string }>("/api/system")
|
||||
if (data.hostname) {
|
||||
setSystemHostname(data.hostname)
|
||||
}
|
||||
} catch {
|
||||
// Ignore - will show generic placeholder
|
||||
}
|
||||
}, [])
|
||||
|
||||
const loadConfig = useCallback(async () => {
|
||||
try {
|
||||
@@ -366,7 +379,8 @@ export function NotificationSettings() {
|
||||
useEffect(() => {
|
||||
loadConfig()
|
||||
loadStatus()
|
||||
}, [loadConfig, loadStatus])
|
||||
loadSystemHostname()
|
||||
}, [loadConfig, loadStatus, loadSystemHostname])
|
||||
|
||||
useEffect(() => {
|
||||
if (showHistory) loadHistory()
|
||||
@@ -1505,6 +1519,25 @@ export function NotificationSettings() {
|
||||
</div>{/* close bordered channel container */}
|
||||
</div>
|
||||
|
||||
{/* ── Display Name ── */}
|
||||
<div className="space-y-2 pb-3 border-b border-border/50">
|
||||
<div className="flex items-center gap-2">
|
||||
<Server className="h-4 w-4 text-blue-400" />
|
||||
<Label className="text-xs sm:text-sm text-foreground/80">Display Name</Label>
|
||||
</div>
|
||||
<Input
|
||||
className={`h-9 text-sm ${!editMode ? "opacity-50 cursor-not-allowed" : ""}`}
|
||||
placeholder={systemHostname || "System hostname"}
|
||||
value={config.hostname || (editMode ? "" : systemHostname)}
|
||||
onChange={e => updateConfig(p => ({ ...p, hostname: e.target.value }))}
|
||||
disabled={!editMode}
|
||||
readOnly={!editMode}
|
||||
/>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Name shown in notifications. Edit to customize, or leave empty to use the system hostname.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* ── Advanced: AI Enhancement ── */}
|
||||
<div>
|
||||
<div className="flex items-center justify-between py-1">
|
||||
|
||||
@@ -641,11 +641,18 @@ export function Security() {
|
||||
const checkAuthStatus = async () => {
|
||||
try {
|
||||
const response = await fetch(getApiUrl("/api/auth/status"))
|
||||
|
||||
// Check if response is valid JSON before parsing
|
||||
if (!response.ok) return
|
||||
|
||||
const contentType = response.headers.get("content-type")
|
||||
if (!contentType || !contentType.includes("application/json")) return
|
||||
|
||||
const data = await response.json()
|
||||
setAuthEnabled(data.auth_enabled || false)
|
||||
setTotpEnabled(data.totp_enabled || false)
|
||||
} catch (err) {
|
||||
console.error("Failed to check auth status:", err)
|
||||
} catch {
|
||||
// API not available (preview environment)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -181,6 +181,20 @@ class HealthMonitor:
|
||||
# not a system problem.
|
||||
r'pvescheduler.*could not update job state',
|
||||
r'pvescheduler.*no such task',
|
||||
|
||||
# ── GPU passthrough / vfio operational noise ──
|
||||
# When a GPU is passed through to a VM using vfio-pci, the host
|
||||
# NVIDIA driver will log errors because it cannot access the GPU.
|
||||
# This is expected behavior, NOT an error - the passthrough is working.
|
||||
r'NVRM.*GPU.*already bound to vfio-pci',
|
||||
r'NVRM.*GPU.*is not supported',
|
||||
r'NVRM.*failed to enable MSI',
|
||||
r'NVRM.*RmInitAdapter failed',
|
||||
r'NVRM.*rm_init_adapter failed',
|
||||
r'nvidia.*probe.*failed',
|
||||
r'vfio-pci.*\d+:\d+:\d+\.\d+.*reset',
|
||||
r'vfio-pci.*enabling device',
|
||||
r'vfio_pci.*cannot assign irq',
|
||||
]
|
||||
|
||||
CRITICAL_LOG_KEYWORDS = [
|
||||
@@ -745,7 +759,13 @@ class HealthMonitor:
|
||||
}
|
||||
|
||||
def _check_cpu_with_hysteresis(self) -> Dict[str, Any]:
|
||||
"""Check CPU with hysteresis to avoid flapping alerts - requires 5min sustained high usage"""
|
||||
"""Check CPU with hysteresis to avoid flapping alerts - requires sustained high usage.
|
||||
|
||||
With samples every ~10 seconds:
|
||||
- CRITICAL: 30 samples >= 95% in 300s window = 5 min sustained
|
||||
- WARNING: 30 samples >= 85% in 300s window = 5 min sustained
|
||||
- RECOVERY: 12 samples < 75% in 120s window = 2 min below threshold
|
||||
"""
|
||||
try:
|
||||
cpu_percent = psutil.cpu_percent(interval=0.1) # 100ms sample - sufficient for health check
|
||||
current_time = time.time()
|
||||
@@ -765,6 +785,7 @@ class HealthMonitor:
|
||||
if current_time - entry['time'] < 360
|
||||
]
|
||||
|
||||
# Count samples in the monitoring windows
|
||||
critical_samples = [
|
||||
entry for entry in self.state_history[state_key]
|
||||
if entry['value'] >= self.CPU_CRITICAL and
|
||||
@@ -783,27 +804,39 @@ class HealthMonitor:
|
||||
current_time - entry['time'] <= self.CPU_RECOVERY_DURATION
|
||||
]
|
||||
|
||||
if len(critical_samples) >= 3:
|
||||
# Require enough samples to cover the sustained period
|
||||
# With ~10s sampling interval: 300s = ~30 samples, 120s = ~12 samples
|
||||
# Using slightly lower thresholds to account for timing variations
|
||||
CRITICAL_MIN_SAMPLES = 25 # ~250s of sustained high CPU
|
||||
WARNING_MIN_SAMPLES = 25 # ~250s of sustained elevated CPU
|
||||
RECOVERY_MIN_SAMPLES = 10 # ~100s of recovery
|
||||
|
||||
if len(critical_samples) >= CRITICAL_MIN_SAMPLES:
|
||||
# Calculate actual duration from oldest to newest sample
|
||||
oldest = min(s['time'] for s in critical_samples)
|
||||
actual_duration = int(current_time - oldest)
|
||||
status = 'CRITICAL'
|
||||
reason = f'CPU >{self.CPU_CRITICAL}% sustained for {self.CPU_CRITICAL_DURATION}s'
|
||||
reason = f'CPU >{self.CPU_CRITICAL}% sustained for {actual_duration}s'
|
||||
# Record the error
|
||||
health_persistence.record_error(
|
||||
error_key='cpu_usage',
|
||||
category='cpu',
|
||||
severity='CRITICAL',
|
||||
reason=reason,
|
||||
details={'cpu_percent': cpu_percent}
|
||||
details={'cpu_percent': cpu_percent, 'duration': actual_duration}
|
||||
)
|
||||
elif len(warning_samples) >= 3 and len(recovery_samples) < 2:
|
||||
elif len(warning_samples) >= WARNING_MIN_SAMPLES and len(recovery_samples) < RECOVERY_MIN_SAMPLES:
|
||||
oldest = min(s['time'] for s in warning_samples)
|
||||
actual_duration = int(current_time - oldest)
|
||||
status = 'WARNING'
|
||||
reason = f'CPU >{self.CPU_WARNING}% sustained for {self.CPU_WARNING_DURATION}s'
|
||||
reason = f'CPU >{self.CPU_WARNING}% sustained for {actual_duration}s'
|
||||
# Record the warning
|
||||
health_persistence.record_error(
|
||||
error_key='cpu_usage',
|
||||
category='cpu',
|
||||
severity='WARNING',
|
||||
reason=reason,
|
||||
details={'cpu_percent': cpu_percent}
|
||||
details={'cpu_percent': cpu_percent, 'duration': actual_duration}
|
||||
)
|
||||
else:
|
||||
status = 'OK'
|
||||
@@ -921,9 +954,15 @@ class HealthMonitor:
|
||||
|
||||
# Require at least 18 samples over 3 minutes (one every 10 seconds) to trigger alert
|
||||
if len(high_temp_samples) >= 18:
|
||||
# Temperature has been >80°C for >3 minutes
|
||||
# Temperature has been >80°C for >3 minutes - calculate actual duration
|
||||
oldest = min(s['time'] for s in high_temp_samples)
|
||||
actual_duration = int(current_time - oldest)
|
||||
actual_minutes = actual_duration // 60
|
||||
actual_seconds = actual_duration % 60
|
||||
duration_str = f'{actual_minutes}m {actual_seconds}s' if actual_minutes > 0 else f'{actual_seconds}s'
|
||||
|
||||
status = 'WARNING'
|
||||
reason = f'CPU temperature {max_temp}°C >80°C sustained >3min'
|
||||
reason = f'CPU temperature {max_temp}°C >80°C sustained for {duration_str}'
|
||||
|
||||
# Record non-dismissable error
|
||||
health_persistence.record_error(
|
||||
@@ -931,7 +970,7 @@ class HealthMonitor:
|
||||
category='temperature',
|
||||
severity='WARNING',
|
||||
reason=reason,
|
||||
details={'temperature': max_temp, 'dismissable': False}
|
||||
details={'temperature': max_temp, 'duration': actual_duration, 'dismissable': False}
|
||||
)
|
||||
elif len(recovery_samples) >= 3:
|
||||
# Temperature has been ≤80°C for 30 seconds - clear the error
|
||||
|
||||
@@ -137,6 +137,30 @@ class NotificationEvent:
|
||||
|
||||
|
||||
def _hostname() -> str:
|
||||
"""Get display hostname for notifications.
|
||||
|
||||
Returns the custom display name from notification settings if configured,
|
||||
otherwise falls back to the system hostname.
|
||||
"""
|
||||
# Try to read custom display name from notification settings
|
||||
try:
|
||||
db_path = Path('/usr/local/share/proxmenux/health_monitor.db')
|
||||
if db_path.exists():
|
||||
conn = sqlite3.connect(str(db_path), timeout=5)
|
||||
conn.execute('PRAGMA busy_timeout=3000')
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT setting_value FROM user_settings WHERE setting_key = ?",
|
||||
('notification.hostname',)
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
conn.close()
|
||||
if row and row[0] and row[0].strip():
|
||||
return row[0].strip()
|
||||
except Exception:
|
||||
pass # Fall back to system hostname
|
||||
|
||||
# Fall back to system hostname
|
||||
try:
|
||||
return socket.gethostname().split('.')[0]
|
||||
except Exception:
|
||||
|
||||
Reference in New Issue
Block a user