diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 91bed5e..2344df4 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,29 +1,29 @@ --- name: Bug Report -about: Reporta un problema en el proyecto -title: "[BUG] Describe el problema" +about: Report a problem in the project +title: "[BUG] Describe the issue" labels: bug assignees: 'MacRimi' --- -## Descripción -Describe el error de forma clara y concisa. +## Description +Describe the bug clearly and concisely. -## Pasos para reproducir +## Steps to Reproduce 1. ... 2. ... 3. ... -## Comportamiento esperado -¿Qué debería ocurrir? +## Expected Behavior +What should happen? -## Capturas de pantalla (Obligatorio) -Agrega imágenes para ayudar a entender el problema. +## Screenshots (Required) +Add images to help illustrate the issue. -## Entorno -- Sistema operativo: -- Versión del software: -- Otros detalles relevantes: +## Environment +- Operating system: +- Software version: +- Other relevant details: -## Información adicional -Agrega cualquier otro contexto sobre el problema aquí. +## Additional Information +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 9354ec1..28f1696 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -2,4 +2,4 @@ blank_issues_enabled: false contact_links: - name: Soporte General url: https://github.com/MacRimi/ProxMenux/discussions - about: Si tu solicitud no es un bug ni un feature, usa las discusiones. + about: If your request is neither a bug nor a feature, please use Discussions. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 68dd603..1243b09 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,19 +1,19 @@ --- name: Feature Request -about: Sugiere una nueva funcionalidad o mejora -title: "[FEATURE] Describe la propuesta" +about: Suggest a new feature or improvement +title: "[FEATURE] Describe your proposal" labels: enhancement assignees: 'MacRimi' --- -## Descripción -Explica la funcionalidad que propones. +## Description +Explain the feature you are proposing. -## Motivación -¿Por qué es importante esta mejora? ¿Qué problema resuelve? +## Motivation +Why is this improvement important? What problem does it solve? -## Alternativas consideradas -¿Hay otras soluciones que hayas pensado? +## Alternatives Considered +Are there other solutions you have thought about? -## Información adicional -Agrega cualquier detalle extra que ayude a entender la propuesta. +## Additional Information +Add any extra details that help understand your proposal. diff --git a/AppImage/app/page.tsx b/AppImage/app/page.tsx index 4184221..c4f3736 100644 --- a/AppImage/app/page.tsx +++ b/AppImage/app/page.tsx @@ -1,7 +1,85 @@ "use client" +import { useState, useEffect } from "react" import { ProxmoxDashboard } from "../components/proxmox-dashboard" +import { Login } from "../components/login" +import { AuthSetup } from "../components/auth-setup" +import { getApiUrl } from "../lib/api-config" export default function Home() { - return + const [authStatus, setAuthStatus] = useState<{ + loading: boolean + authEnabled: boolean + authConfigured: boolean + authenticated: boolean + }>({ + loading: true, + authEnabled: false, + authConfigured: false, + authenticated: false, + }) + + useEffect(() => { + checkAuthStatus() + }, []) + + const checkAuthStatus = async () => { + try { + const token = localStorage.getItem("proxmenux-auth-token") + const response = await fetch(getApiUrl("/api/auth/status"), { + headers: token ? { Authorization: `Bearer ${token}` } : {}, + }) + const data = await response.json() + + console.log("[v0] Auth status:", data) + + const authenticated = data.auth_enabled ? data.authenticated : true + + setAuthStatus({ + loading: false, + authEnabled: data.auth_enabled, + authConfigured: data.auth_configured, + authenticated, + }) + } catch (error) { + console.error("[v0] Failed to check auth status:", error) + setAuthStatus({ + loading: false, + authEnabled: false, + authConfigured: false, + authenticated: true, + }) + } + } + + const handleAuthComplete = () => { + checkAuthStatus() + } + + const handleLoginSuccess = () => { + checkAuthStatus() + } + + if (authStatus.loading) { + return ( +
+
+
+

Loading...

+
+
+ ) + } + + if (authStatus.authEnabled && !authStatus.authenticated) { + return + } + + // Show dashboard in all other cases + return ( + <> + {!authStatus.authConfigured && } + + + ) } diff --git a/AppImage/components/auth-setup.tsx b/AppImage/components/auth-setup.tsx index fd269c1..672cf11 100644 --- a/AppImage/components/auth-setup.tsx +++ b/AppImage/components/auth-setup.tsx @@ -129,15 +129,15 @@ export function AuthSetup({ onComplete }: AuthSetupProps) { return ( - + {step === "choice" ? ( -
+

Protect Your Dashboard?

-

+

Add an extra layer of security to protect your Proxmox data when accessing from non-private networks.

@@ -161,13 +161,13 @@ export function AuthSetup({ onComplete }: AuthSetupProps) {

You can always enable this later in Settings

) : ( -
+

Setup Authentication

-

Create a username and password to protect your dashboard

+

Create a username and password to protect your dashboard

{error && ( @@ -179,7 +179,9 @@ export function AuthSetup({ onComplete }: AuthSetupProps) {
- +
setUsername(e.target.value)} - className="pl-10" + className="pl-10 text-base" disabled={loading} + autoComplete="username" />
- +
setPassword(e.target.value)} - className="pl-10" + className="pl-10 text-base" disabled={loading} + autoComplete="new-password" />
- +
setConfirmPassword(e.target.value)} - className="pl-10" + className="pl-10 text-base" disabled={loading} + autoComplete="new-password" />
diff --git a/AppImage/components/hardware.tsx b/AppImage/components/hardware.tsx index 53e0b2a..15db5a0 100644 --- a/AppImage/components/hardware.tsx +++ b/AppImage/components/hardware.tsx @@ -163,14 +163,49 @@ const groupAndSortTemperatures = (temperatures: any[]) => { } export default function Hardware() { + // Static data - load once without refresh const { - data: hardwareData, - error, - isLoading, + data: staticHardwareData, + error: staticError, + isLoading: staticLoading, } = useSWR("/api/hardware", fetcher, { - refreshInterval: 5000, + revalidateOnFocus: false, + revalidateOnReconnect: false, + refreshInterval: 0, // No auto-refresh for static data }) + // Dynamic data - refresh every 5 seconds for temperatures, fans, power, ups + const { + data: dynamicHardwareData, + error: dynamicError, + isLoading: dynamicLoading, + } = useSWR("/api/hardware", fetcher, { + refreshInterval: 7000, + }) + + // Merge static and dynamic data, preferring static for CPU/memory/PCI/disks + const hardwareData = staticHardwareData + ? { + ...dynamicHardwareData, + // Keep static data from initial load + cpu: staticHardwareData.cpu, + motherboard: staticHardwareData.motherboard, + memory_modules: staticHardwareData.memory_modules, + pci_devices: staticHardwareData.pci_devices, + storage_devices: staticHardwareData.storage_devices, + gpus: staticHardwareData.gpus, + // Use dynamic data for these + temperatures: dynamicHardwareData?.temperatures, + fans: dynamicHardwareData?.fans, + power_meter: dynamicHardwareData?.power_meter, + power_supplies: dynamicHardwareData?.power_supplies, + ups: dynamicHardwareData?.ups, + } + : undefined + + const error = staticError || dynamicError + const isLoading = staticLoading + useEffect(() => { if (hardwareData?.storage_devices) { console.log("[v0] Storage devices data from backend:", hardwareData.storage_devices) diff --git a/AppImage/components/login.tsx b/AppImage/components/login.tsx index 38f3aac..0c6c4f9 100644 --- a/AppImage/components/login.tsx +++ b/AppImage/components/login.tsx @@ -2,11 +2,12 @@ import type React from "react" -import { useState } from "react" +import { useState, useEffect } from "react" import { Button } from "./ui/button" import { Input } from "./ui/input" import { Label } from "./ui/label" -import { Lock, User, AlertCircle, Server } from "lucide-react" +import { Checkbox } from "./ui/checkbox" +import { Lock, User, AlertCircle, Server, Shield } from "lucide-react" import { getApiUrl } from "../lib/api-config" import Image from "next/image" @@ -17,9 +18,23 @@ interface LoginProps { export function Login({ onLogin }: LoginProps) { const [username, setUsername] = useState("") const [password, setPassword] = useState("") + const [totpCode, setTotpCode] = useState("") + const [requiresTotp, setRequiresTotp] = useState(false) + const [rememberMe, setRememberMe] = useState(false) const [error, setError] = useState("") const [loading, setLoading] = useState(false) + useEffect(() => { + const savedUsername = localStorage.getItem("proxmenux-saved-username") + const savedPassword = localStorage.getItem("proxmenux-saved-password") + + if (savedUsername && savedPassword) { + setUsername(savedUsername) + setPassword(savedPassword) + setRememberMe(true) + } + }, []) + const handleLogin = async (e: React.FormEvent) => { e.preventDefault() setError("") @@ -29,23 +44,46 @@ export function Login({ onLogin }: LoginProps) { return } + if (requiresTotp && !totpCode) { + setError("Please enter your 2FA code") + return + } + setLoading(true) try { const response = await fetch(getApiUrl("/api/auth/login"), { method: "POST", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ username, password }), + body: JSON.stringify({ + username, + password, + totp_token: totpCode || undefined, // Include 2FA code if provided + }), }) const data = await response.json() - if (!response.ok) { - throw new Error(data.error || "Login failed") + if (data.requires_totp) { + setRequiresTotp(true) + setLoading(false) + return + } + + if (!response.ok) { + throw new Error(data.message || "Login failed") } - // Save token localStorage.setItem("proxmenux-auth-token", data.token) + + if (rememberMe) { + localStorage.setItem("proxmenux-saved-username", username) + localStorage.setItem("proxmenux-saved-password", password) + } else { + localStorage.removeItem("proxmenux-saved-username") + localStorage.removeItem("proxmenux-saved-password") + } + onLogin() } catch (err) { setError(err instanceof Error ? err.message : "Login failed") @@ -94,42 +132,107 @@ export function Login({ onLogin }: LoginProps) {
)} -
- -
- - setUsername(e.target.value)} - className="pl-10" - disabled={loading} - autoComplete="username" - /> -
-
+ {!requiresTotp ? ( + <> +
+ +
+ + setUsername(e.target.value)} + className="pl-10 text-base" + disabled={loading} + autoComplete="username" + /> +
+
-
- -
- - setPassword(e.target.value)} - className="pl-10" - disabled={loading} - autoComplete="current-password" - /> +
+ +
+ + setPassword(e.target.value)} + className="pl-10 text-base" + disabled={loading} + autoComplete="current-password" + /> +
+
+ +
+ setRememberMe(checked as boolean)} + disabled={loading} + /> + +
+ + ) : ( +
+
+ +
+

Two-Factor Authentication

+

Enter the 6-digit code from your authentication app

+
+
+ +
+ + setTotpCode(e.target.value.replace(/\D/g, "").slice(0, 6))} + className="text-center text-lg tracking-widest font-mono text-base" + maxLength={6} + disabled={loading} + autoComplete="one-time-code" + autoFocus + /> +

+ You can also use a backup code (format: XXXX-XXXX) +

+
+ +
-
+ )}
diff --git a/AppImage/components/network-metrics.tsx b/AppImage/components/network-metrics.tsx index 0d9386a..6b6de14 100644 --- a/AppImage/components/network-metrics.tsx +++ b/AppImage/components/network-metrics.tsx @@ -3,7 +3,7 @@ import { useState } from "react" import { Card, CardContent, CardHeader, CardTitle } from "./ui/card" import { Badge } from "./ui/badge" -import { Dialog, DialogContent, DialogHeader, DialogTitle } from "./ui/dialog" +import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogDescription } from "./ui/dialog" import { Wifi, Activity, Network, Router, AlertCircle, Zap } from "lucide-react" import useSWR from "swr" import { NetworkTrafficChart } from "./network-traffic-chart" @@ -149,7 +149,7 @@ export function NetworkMetrics() { error, isLoading, } = useSWR("/api/network", fetcher, { - refreshInterval: 60000, // Refresh every 60 seconds + refreshInterval: 53000, revalidateOnFocus: false, revalidateOnReconnect: true, }) @@ -161,13 +161,13 @@ export function NetworkMetrics() { const [interfaceTotals, setInterfaceTotals] = useState<{ received: number; sent: number }>({ received: 0, sent: 0 }) const { data: modalNetworkData } = useSWR(selectedInterface ? "/api/network" : null, fetcher, { - refreshInterval: 15000, // Refresh every 15 seconds when modal is open + refreshInterval: 17000, revalidateOnFocus: false, revalidateOnReconnect: true, }) const { data: interfaceHistoricalData } = useSWR(`/api/node/metrics?timeframe=${timeframe}`, fetcher, { - refreshInterval: 30000, + refreshInterval: 29000, revalidateOnFocus: false, }) @@ -688,6 +688,9 @@ export function NetworkMetrics() { {selectedInterface?.name} - Interface Details + + View detailed information and network traffic statistics for this interface + {selectedInterface?.status.toLowerCase() === "up" && selectedInterface?.vm_type !== "vm" && (
setDisable2FAPassword(e.target.value)} + className="pl-10" + disabled={loading} + /> +
+
+ +
+ + +
+
+ )} +
+ )} + @@ -429,6 +558,15 @@ export function Settings() { + + setShow2FASetup(false)} + onSuccess={() => { + setSuccess("2FA enabled successfully!") + checkAuthStatus() + }} + /> ) } diff --git a/AppImage/components/system-logs.tsx b/AppImage/components/system-logs.tsx index 853a9eb..d23b6f5 100644 --- a/AppImage/components/system-logs.tsx +++ b/AppImage/components/system-logs.tsx @@ -27,7 +27,7 @@ import { Menu, Terminal, } from "lucide-react" -import { useState, useEffect } from "react" +import { useState, useEffect, useMemo } from "react" interface Log { timestamp: string @@ -428,39 +428,61 @@ export function SystemLogs() { } } - const logsOnly: CombinedLogEntry[] = logs - .map((log) => ({ ...log, isEvent: false, sortTimestamp: new Date(log.timestamp).getTime() })) - .sort((a, b) => b.sortTimestamp - a.sortTimestamp) + const safeToLowerCase = (value: any): string => { + if (value === null || value === undefined) return "" + return String(value).toLowerCase() + } - const eventsOnly: CombinedLogEntry[] = events - .map((event) => ({ - timestamp: event.starttime, - level: event.level, - service: event.type, - message: `${event.type}${event.vmid ? ` (VM/CT ${event.vmid})` : ""} - ${event.status}`, - source: `Node: ${event.node} • User: ${event.user}`, - isEvent: true, - eventData: event, - sortTimestamp: new Date(event.starttime).getTime(), - })) - .sort((a, b) => b.sortTimestamp - a.sortTimestamp) + const memoizedLogs = useMemo(() => logs, [logs]) + const memoizedEvents = useMemo(() => events, [events]) + const memoizedBackups = useMemo(() => backups, [backups]) + const memoizedNotifications = useMemo(() => notifications, [notifications]) + + const logsOnly: CombinedLogEntry[] = useMemo( + () => + memoizedLogs + .map((log) => ({ ...log, isEvent: false, sortTimestamp: new Date(log.timestamp).getTime() })) + .sort((a, b) => b.sortTimestamp - a.sortTimestamp), + [memoizedLogs], + ) + + const eventsOnly: CombinedLogEntry[] = useMemo( + () => + memoizedEvents + .map((event) => ({ + timestamp: event.starttime, + level: event.level, + service: event.type, + message: `${event.type}${event.vmid ? ` (VM/CT ${event.vmid})` : ""} - ${event.status}`, + source: `Node: ${event.node} • User: ${event.user}`, + isEvent: true, + eventData: event, + sortTimestamp: new Date(event.starttime).getTime(), + })) + .sort((a, b) => b.sortTimestamp - a.sortTimestamp), + [memoizedEvents], + ) - // Filter logs only const filteredLogsOnly = logsOnly.filter((log) => { + const message = log.message || "" + const service = log.service || "" + const searchTermLower = safeToLowerCase(searchTerm) + const matchesSearch = - log.message.toLowerCase().includes(searchTerm.toLowerCase()) || - log.service.toLowerCase().includes(searchTerm.toLowerCase()) + safeToLowerCase(message).includes(searchTermLower) || safeToLowerCase(service).includes(searchTermLower) const matchesLevel = levelFilter === "all" || log.level === levelFilter const matchesService = serviceFilter === "all" || log.service === serviceFilter return matchesSearch && matchesLevel && matchesService }) - // Filter events only const filteredEventsOnly = eventsOnly.filter((event) => { + const message = event.message || "" + const service = event.service || "" + const searchTermLower = safeToLowerCase(searchTerm) + const matchesSearch = - event.message.toLowerCase().includes(searchTerm.toLowerCase()) || - event.service.toLowerCase().includes(searchTerm.toLowerCase()) + safeToLowerCase(message).includes(searchTermLower) || safeToLowerCase(service).includes(searchTermLower) const matchesLevel = levelFilter === "all" || event.level === levelFilter const matchesService = serviceFilter === "all" || event.service === serviceFilter @@ -470,30 +492,40 @@ export function SystemLogs() { const displayedLogsOnly = filteredLogsOnly.slice(0, displayedLogsCount) const displayedEventsOnly = filteredEventsOnly.slice(0, displayedLogsCount) - const combinedLogs: CombinedLogEntry[] = [ - ...logs.map((log) => ({ ...log, isEvent: false, sortTimestamp: new Date(log.timestamp).getTime() })), - ...events.map((event) => ({ - timestamp: event.starttime, - level: event.level, - service: event.type, - message: `${event.type}${event.vmid ? ` (VM/CT ${event.vmid})` : ""} - ${event.status}`, - source: `Node: ${event.node} • User: ${event.user}`, - isEvent: true, - eventData: event, - sortTimestamp: new Date(event.starttime).getTime(), - })), - ].sort((a, b) => b.sortTimestamp - a.sortTimestamp) // Sort by timestamp descending + const combinedLogs: CombinedLogEntry[] = useMemo( + () => + [ + ...memoizedLogs.map((log) => ({ ...log, isEvent: false, sortTimestamp: new Date(log.timestamp).getTime() })), + ...memoizedEvents.map((event) => ({ + timestamp: event.starttime, + level: event.level, + service: event.type, + message: `${event.type}${event.vmid ? ` (VM/CT ${event.vmid})` : ""} - ${event.status}`, + source: `Node: ${event.node} • User: ${event.user}`, + isEvent: true, + eventData: event, + sortTimestamp: new Date(event.starttime).getTime(), + })), + ].sort((a, b) => b.sortTimestamp - a.sortTimestamp), + [memoizedLogs, memoizedEvents], + ) - // Filter combined logs - const filteredCombinedLogs = combinedLogs.filter((log) => { - const matchesSearch = - log.message.toLowerCase().includes(searchTerm.toLowerCase()) || - log.service.toLowerCase().includes(searchTerm.toLowerCase()) - const matchesLevel = levelFilter === "all" || log.level === levelFilter - const matchesService = serviceFilter === "all" || log.service === serviceFilter + const filteredCombinedLogs = useMemo( + () => + combinedLogs.filter((log) => { + const message = log.message || "" + const service = log.service || "" + const searchTermLower = safeToLowerCase(searchTerm) - return matchesSearch && matchesLevel && matchesService - }) + const matchesSearch = + safeToLowerCase(message).includes(searchTermLower) || safeToLowerCase(service).includes(searchTermLower) + const matchesLevel = levelFilter === "all" || log.level === levelFilter + const matchesService = serviceFilter === "all" || log.service === serviceFilter + + return matchesSearch && matchesLevel && matchesService + }), + [combinedLogs, searchTerm, levelFilter, serviceFilter], + ) // CHANGE: Re-assigning displayedLogs to use the filteredCombinedLogs const displayedLogs = filteredCombinedLogs.slice(0, displayedLogsCount) @@ -555,7 +587,9 @@ export function SystemLogs() { } const getNotificationTypeColor = (type: string) => { - switch (type.toLowerCase()) { + if (!type) return "bg-gray-500/10 text-gray-500 border-gray-500/20" + + switch (safeToLowerCase(type)) { case "error": return "bg-red-500/10 text-red-500 border-red-500/20" case "warning": @@ -571,7 +605,9 @@ export function SystemLogs() { // ADDED: New function for notification source colors const getNotificationSourceColor = (source: string) => { - switch (source.toLowerCase()) { + if (!source) return "bg-gray-500/10 text-gray-500 border-gray-500/20" + + switch (safeToLowerCase(source)) { case "task-log": return "bg-purple-500/10 text-purple-500 border-purple-500/20" case "journal": @@ -590,7 +626,7 @@ export function SystemLogs() { info: logs.filter((log) => ["info", "notice", "debug"].includes(log.level)).length, } - const uniqueServices = [...new Set(logs.map((log) => log.service))] + const uniqueServices = useMemo(() => [...new Set(memoizedLogs.map((log) => log.service))], [memoizedLogs]) const getBackupType = (volid: string): "vm" | "lxc" => { if (volid.includes("/vm/") || volid.includes("vzdump-qemu")) { @@ -915,9 +951,11 @@ export function SystemLogs() { - All Services - {uniqueServices.slice(0, 20).map((service) => ( - + + All Services + + {uniqueServices.slice(0, 20).map((service, idx) => ( + {service} ))} @@ -932,51 +970,59 @@ export function SystemLogs() {
- {displayedLogs.map((log, index) => ( -
{ - if (log.eventData) { - setSelectedEvent(log.eventData) - setIsEventModalOpen(true) - } else { - setSelectedLog(log as SystemLog) - setIsLogModalOpen(true) - } - }} - > -
- - {getLevelIcon(log.level)} - {log.level.toUpperCase()} - - {log.eventData && ( - - - EVENT - - )} -
+ {displayedLogs.map((log, index) => { + // Generate a more stable unique key + const timestampMs = new Date(log.timestamp).getTime() + const uniqueKey = log.eventData + ? `event-${log.eventData.upid.replace(/:/g, "-")}-${timestampMs}` + : `log-${timestampMs}-${log.service?.substring(0, 10) || "unknown"}-${log.pid || "nopid"}-${index}` -
-
-
{log.service}
-
- {log.timestamp} + return ( +
{ + if (log.eventData) { + setSelectedEvent(log.eventData) + setIsEventModalOpen(true) + } else { + setSelectedLog(log as SystemLog) + setIsLogModalOpen(true) + } + }} + > +
+ + {getLevelIcon(log.level)} + {log.level.toUpperCase()} + + {log.eventData && ( + + + EVENT + + )} +
+ +
+
+
{log.service}
+
+ {log.timestamp} +
+
+
+ {log.message} +
+
+ {log.source} + {log.pid && ` • PID: ${log.pid}`} + {log.hostname && ` • Host: ${log.hostname}`}
-
- {log.message} -
-
- {log.source} - {log.pid && ` • PID: ${log.pid}`} - {log.hostname && ` • Host: ${log.hostname}`} -
-
- ))} + ) + })} {displayedLogs.length === 0 && (
@@ -1037,44 +1083,48 @@ export function SystemLogs() {
- {backups.map((backup, index) => ( -
{ - setSelectedBackup(backup) - setIsBackupModalOpen(true) - }} - > -
- -
+ {memoizedBackups.map((backup, index) => { + const uniqueKey = `backup-${backup.volid.replace(/[/:]/g, "-")}-${backup.timestamp || index}` -
-
-
- - {getBackupTypeLabel(backup.volid)} - - - {getBackupStorageLabel(backup.volid)} + return ( +
{ + setSelectedBackup(backup) + setIsBackupModalOpen(true) + }} + > +
+ +
+ +
+
+
+ + {getBackupTypeLabel(backup.volid)} + + + {getBackupStorageLabel(backup.volid)} + +
+ + {backup.size_human}
- - {backup.size_human} - -
-
Storage: {backup.storage}
-
- - {backup.created} +
Storage: {backup.storage}
+
+ + {backup.created} +
-
- ))} + ) + })} {backups.length === 0 && (
@@ -1090,42 +1140,47 @@ export function SystemLogs() {
- {notifications.map((notification, index) => ( -
{ - setSelectedNotification(notification) - setIsNotificationModalOpen(true) - }} - > -
- - {notification.type.toUpperCase()} - - - {notification.source === "task-log" && } - {notification.source === "journal" && } - {notification.source.toUpperCase()} - -
+ {memoizedNotifications.map((notification, index) => { + const timestampMs = new Date(notification.timestamp).getTime() + const uniqueKey = `notification-${timestampMs}-${notification.service?.substring(0, 10) || "unknown"}-${notification.source?.substring(0, 10) || "unknown"}-${index}` -
-
-
{notification.service}
-
- {notification.timestamp} + return ( +
{ + setSelectedNotification(notification) + setIsNotificationModalOpen(true) + }} + > +
+ + {notification.type.toUpperCase()} + + + {notification.source === "task-log" && } + {notification.source === "journal" && } + {notification.source.toUpperCase()} + +
+ +
+
+
{notification.service}
+
+ {notification.timestamp} +
+
+
+ {notification.message} +
+
+ Service: {notification.service} • Source: {notification.source}
-
- {notification.message} -
-
- Service: {notification.service} • Source: {notification.source} -
-
- ))} + ) + })} {notifications.length === 0 && (
diff --git a/AppImage/components/system-overview.tsx b/AppImage/components/system-overview.tsx index 20204f4..9c41550 100644 --- a/AppImage/components/system-overview.tsx +++ b/AppImage/components/system-overview.tsx @@ -259,7 +259,7 @@ export function SystemOverview() { fetchSystemData().then((data) => { if (data) setSystemData(data) }) - }, 10000) + }, 9000) // Cambiado de 10000 a 9000ms return () => { clearInterval(systemInterval) @@ -273,7 +273,7 @@ export function SystemOverview() { } fetchVMs() - const vmInterval = setInterval(fetchVMs, 60000) + const vmInterval = setInterval(fetchVMs, 59000) // Cambiado de 60000 a 59000ms return () => { clearInterval(vmInterval) @@ -290,7 +290,7 @@ export function SystemOverview() { } fetchStorage() - const storageInterval = setInterval(fetchStorage, 60000) + const storageInterval = setInterval(fetchStorage, 59000) // Cambiado de 60000 a 59000ms return () => { clearInterval(storageInterval) @@ -304,7 +304,7 @@ export function SystemOverview() { } fetchNetwork() - const networkInterval = setInterval(fetchNetwork, 60000) + const networkInterval = setInterval(fetchNetwork, 59000) // Cambiado de 60000 a 59000ms return () => { clearInterval(networkInterval) diff --git a/AppImage/components/two-factor-setup.tsx b/AppImage/components/two-factor-setup.tsx new file mode 100644 index 0000000..9a4a9cb --- /dev/null +++ b/AppImage/components/two-factor-setup.tsx @@ -0,0 +1,261 @@ +"use client" + +import { useState } from "react" +import { Button } from "./ui/button" +import { Input } from "./ui/input" +import { Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle } from "./ui/dialog" +import { AlertCircle, CheckCircle, Copy, Shield, Check } from "lucide-react" +import { getApiUrl } from "../lib/api-config" + +interface TwoFactorSetupProps { + open: boolean + onClose: () => void + onSuccess: () => void +} + +export function TwoFactorSetup({ open, onClose, onSuccess }: TwoFactorSetupProps) { + const [step, setStep] = useState(1) + const [qrCode, setQrCode] = useState("") + const [secret, setSecret] = useState("") + const [backupCodes, setBackupCodes] = useState([]) + const [verificationCode, setVerificationCode] = useState("") + const [error, setError] = useState("") + const [loading, setLoading] = useState(false) + const [copiedSecret, setCopiedSecret] = useState(false) + const [copiedCodes, setCopiedCodes] = useState(false) + + const handleSetupStart = async () => { + setError("") + setLoading(true) + + try { + const token = localStorage.getItem("proxmenux-auth-token") + const response = await fetch(getApiUrl("/api/auth/totp/setup"), { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${token}`, + }, + }) + + const data = await response.json() + + if (!response.ok) { + throw new Error(data.message || "Failed to setup 2FA") + } + + setQrCode(data.qr_code) + setSecret(data.secret) + setBackupCodes(data.backup_codes) + setStep(2) + } catch (err) { + setError(err instanceof Error ? err.message : "Failed to setup 2FA") + } finally { + setLoading(false) + } + } + + const handleVerify = async () => { + if (!verificationCode || verificationCode.length !== 6) { + setError("Please enter a 6-digit code") + return + } + + setError("") + setLoading(true) + + try { + const token = localStorage.getItem("proxmenux-auth-token") + const response = await fetch(getApiUrl("/api/auth/totp/enable"), { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${token}`, + }, + body: JSON.stringify({ token: verificationCode }), + }) + + const data = await response.json() + + if (!response.ok) { + throw new Error(data.message || "Invalid verification code") + } + + setStep(3) + } catch (err) { + setError(err instanceof Error ? err.message : "Verification failed") + } finally { + setLoading(false) + } + } + + const copyToClipboard = (text: string, type: "secret" | "codes") => { + navigator.clipboard.writeText(text) + if (type === "secret") { + setCopiedSecret(true) + setTimeout(() => setCopiedSecret(false), 2000) + } else { + setCopiedCodes(true) + setTimeout(() => setCopiedCodes(false), 2000) + } + } + + const handleClose = () => { + setStep(1) + setQrCode("") + setSecret("") + setBackupCodes([]) + setVerificationCode("") + setError("") + onClose() + } + + const handleFinish = () => { + handleClose() + onSuccess() + } + + return ( + + + + + + Setup Two-Factor Authentication + + Add an extra layer of security to your account + + + {error && ( +
+ +

{error}

+
+ )} + + {step === 1 && ( +
+
+

+ Two-factor authentication (2FA) adds an extra layer of security by requiring a code from your + authentication app in addition to your password. +

+
+ +
+

You will need:

+
    +
  • An authentication app (Google Authenticator, Authy, etc.)
  • +
  • Scan a QR code or enter a key manually
  • +
  • Store backup codes securely
  • +
+
+ + +
+ )} + + {step === 2 && ( +
+
+

1. Scan the QR code

+

Open your authentication app and scan this QR code

+ {qrCode && ( +
+ QR Code +
+ )} +
+ +
+

Or enter the key manually:

+
+ + +
+
+ +
+

2. Enter the verification code

+

Enter the 6-digit code that appears in your app

+ setVerificationCode(e.target.value.replace(/\D/g, "").slice(0, 6))} + className="text-center text-lg tracking-widest font-mono text-base" + maxLength={6} + disabled={loading} + /> +
+ +
+ + +
+
+ )} + + {step === 3 && ( +
+
+ +
+

2FA Enabled Successfully

+

+ Your account is now protected with two-factor authentication +

+
+
+ +
+

Important: Save your backup codes

+

+ These codes will allow you to access your account if you lose access to your authentication app. Store + them in a safe place. +

+ +
+
+ Backup Codes + +
+
+ {backupCodes.map((code, index) => ( +
+ {code} +
+ ))} +
+
+
+ + +
+ )} +
+
+ ) +} diff --git a/AppImage/components/ui/dialog.tsx b/AppImage/components/ui/dialog.tsx index e6dcf4a..5c95c56 100644 --- a/AppImage/components/ui/dialog.tsx +++ b/AppImage/components/ui/dialog.tsx @@ -41,6 +41,7 @@ const DialogContent = React.forwardRef< "fixed left-[50%] top-[50%] z-50 grid w-full max-w-lg translate-x-[-50%] translate-y-[-50%] gap-4 border bg-background p-6 shadow-lg duration-200 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[state=closed]:slide-out-to-left-1/2 data-[state=closed]:slide-out-to-top-[48%] data-[state=open]:slide-in-from-left-1/2 data-[state=open]:slide-in-from-top-[48%] rounded-lg", className, )} + aria-describedby={props["aria-describedby"] || undefined} {...props} > {children} diff --git a/AppImage/components/virtual-machines.tsx b/AppImage/components/virtual-machines.tsx index fd5cc4b..dd0ffcb 100644 --- a/AppImage/components/virtual-machines.tsx +++ b/AppImage/components/virtual-machines.tsx @@ -7,7 +7,7 @@ import { Card, CardContent, CardHeader, CardTitle } from "./ui/card" import { Badge } from "./ui/badge" import { Progress } from "./ui/progress" import { Button } from "./ui/button" -import { Dialog, DialogContent, DialogHeader, DialogTitle } from "./ui/dialog" +import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogDescription } from "./ui/dialog" import { Server, Play, @@ -264,7 +264,7 @@ export function VirtualMachines() { isLoading, mutate, } = useSWR("/api/vms", fetcher, { - refreshInterval: 30000, + refreshInterval: 23000, revalidateOnFocus: false, revalidateOnReconnect: true, }) @@ -451,7 +451,7 @@ export function VirtualMachines() { "/api/system", fetcher, { - refreshInterval: 30000, + refreshInterval: 23000, revalidateOnFocus: false, }, ) @@ -1042,7 +1042,10 @@ export function VirtualMachines() { setEditedNotes("") }} > - + {currentView === "main" ? ( <> @@ -1096,13 +1099,16 @@ export function VirtualMachines() { )}
+ + View and manage configuration, resources, and status for this virtual machine +
{selectedVM && ( <> -
+
Loading configuration...
) : vmDetails?.config ? ( <> - +

@@ -1259,26 +1265,25 @@ export function VirtualMachines() { )}

+ {/* IP Addresses with proper keys */} {selectedVM?.type === "lxc" && vmDetails?.lxc_ip_info && (

IP Addresses

- {/* Real IPs (green, without "Real" label) */} {vmDetails.lxc_ip_info.real_ips.map((ip, index) => ( {ip} ))} - {/* Docker bridge IPs (yellow, with "Bridge" label) */} {vmDetails.lxc_ip_info.docker_ips.map((ip, index) => ( @@ -1388,7 +1393,7 @@ export function VirtualMachines() {
)} - {/* GPU Passthrough */} + {/* GPU Passthrough with proper keys */} {vmDetails.hardware_info.gpu_passthrough && vmDetails.hardware_info.gpu_passthrough.length > 0 && (
@@ -1396,7 +1401,7 @@ export function VirtualMachines() {
{vmDetails.hardware_info.gpu_passthrough.map((gpu, index) => ( )} - {/* Other Hardware Devices */} + {/* Hardware Devices with proper keys */} {vmDetails.hardware_info.devices && vmDetails.hardware_info.devices.length > 0 && (
@@ -1419,7 +1424,7 @@ export function VirtualMachines() {
{vmDetails.hardware_info.devices.map((device, index) => ( @@ -1541,7 +1546,7 @@ export function VirtualMachines() {
{vmDetails.config.rootfs && ( -
+
Root Filesystem
{vmDetails.config.rootfs} @@ -1549,15 +1554,16 @@ export function VirtualMachines() {
)} {vmDetails.config.scsihw && ( -
+
SCSI Controller
{vmDetails.config.scsihw}
)} + {/* Disk Storage with proper keys */} {Object.keys(vmDetails.config) .filter((key) => key.match(/^(scsi|sata|ide|virtio)\d+$/)) .map((diskKey) => ( -
+
{diskKey.toUpperCase().replace(/(\d+)/, " $1")}
@@ -1567,7 +1573,7 @@ export function VirtualMachines() {
))} {vmDetails.config.efidisk0 && ( -
+
EFI Disk
{vmDetails.config.efidisk0} @@ -1575,18 +1581,18 @@ export function VirtualMachines() {
)} {vmDetails.config.tpmstate0 && ( -
+
TPM State
{vmDetails.config.tpmstate0}
)} - {/* Mount points for LXC */} + {/* Mount Points with proper keys */} {Object.keys(vmDetails.config) .filter((key) => key.match(/^mp\d+$/)) .map((mpKey) => ( -
+
Mount Point {mpKey.replace("mp", "")}
@@ -1604,10 +1610,11 @@ export function VirtualMachines() { Network
+ {/* Network Interfaces with proper keys */} {Object.keys(vmDetails.config) .filter((key) => key.match(/^net\d+$/)) .map((netKey) => ( -
+
Network Interface {netKey.replace("net", "")}
@@ -1645,7 +1652,7 @@ export function VirtualMachines() {
- {/* PCI Devices Section */} + {/* PCI Devices with proper keys */} {Object.keys(vmDetails.config).some((key) => key.match(/^hostpci\d+$/)) && (

@@ -1655,7 +1662,7 @@ export function VirtualMachines() { {Object.keys(vmDetails.config) .filter((key) => key.match(/^hostpci\d+$/)) .map((pciKey) => ( -
+
{pciKey.toUpperCase().replace(/(\d+)/, " $1")}
@@ -1668,7 +1675,7 @@ export function VirtualMachines() {
)} - {/* USB Devices Section */} + {/* USB Devices with proper keys */} {Object.keys(vmDetails.config).some((key) => key.match(/^usb\d+$/)) && (

@@ -1678,7 +1685,7 @@ export function VirtualMachines() { {Object.keys(vmDetails.config) .filter((key) => key.match(/^usb\d+$/)) .map((usbKey) => ( -
+
{usbKey.toUpperCase().replace(/(\d+)/, " $1")}
@@ -1691,7 +1698,7 @@ export function VirtualMachines() {
)} - {/* Serial Devices Section */} + {/* Serial Ports with proper keys */} {Object.keys(vmDetails.config).some((key) => key.match(/^serial\d+$/)) && (

@@ -1701,7 +1708,7 @@ export function VirtualMachines() { {Object.keys(vmDetails.config) .filter((key) => key.match(/^serial\d+$/)) .map((serialKey) => ( -
+
{serialKey.toUpperCase().replace(/(\d+)/, " $1")}
@@ -1713,91 +1720,6 @@ export function VirtualMachines() {
)} - - {/* Options Section */} -
-

- Options -

-
- {vmDetails.config.onboot !== undefined && ( -
-
Start on Boot
- - {vmDetails.config.onboot ? "Yes" : "No"} - -
- )} - {vmDetails.config.ostype && ( -
-
OS Type
-
{vmDetails.config.ostype}
-
- )} - {vmDetails.config.arch && ( -
-
Architecture
-
{vmDetails.config.arch}
-
- )} - {vmDetails.config.boot && ( -
-
Boot Order
-
{vmDetails.config.boot}
-
- )} - {vmDetails.config.features && ( -
-
Features
-
- {vmDetails.config.features} -
-
- )} -
-
- - {/* Advanced Section */} - {(vmDetails.config.vmgenid || vmDetails.config.smbios1 || vmDetails.config.meta) && ( -
-

- Advanced -

-
- {vmDetails.config.vmgenid && ( -
-
VM Generation ID
-
- {vmDetails.config.vmgenid} -
-
- )} - {vmDetails.config.smbios1 && ( -
-
SMBIOS
-
- {vmDetails.config.smbios1} -
-
- )} - {vmDetails.config.meta && ( -
-
Metadata
-
- {vmDetails.config.meta} -
-
- )} -
-
- )}

)} diff --git a/AppImage/lib/polling-config.tsx b/AppImage/lib/polling-config.tsx new file mode 100644 index 0000000..b0becb3 --- /dev/null +++ b/AppImage/lib/polling-config.tsx @@ -0,0 +1,85 @@ +"use client" + +import { createContext, useContext, useState, useEffect, type ReactNode } from "react" + +export interface PollingIntervals { + storage: number + network: number + vms: number + hardware: number +} + +// Default intervals in milliseconds +const DEFAULT_INTERVALS: PollingIntervals = { + storage: 60000, // 60 seconds + network: 60000, // 60 seconds + vms: 30000, // 30 seconds + hardware: 60000, // 60 seconds +} + +const STORAGE_KEY = "proxmenux_polling_intervals" + +interface PollingConfigContextType { + intervals: PollingIntervals + updateInterval: (key: keyof PollingIntervals, value: number) => void +} + +const PollingConfigContext = createContext(undefined) + +export function PollingConfigProvider({ children }: { children: ReactNode }) { + const [intervals, setIntervals] = useState(DEFAULT_INTERVALS) + + // Load from localStorage on mount + useEffect(() => { + if (typeof window === "undefined") return + + const stored = localStorage.getItem(STORAGE_KEY) + if (stored) { + try { + const parsed = JSON.parse(stored) + setIntervals({ ...DEFAULT_INTERVALS, ...parsed }) + } catch (e) { + console.error("[v0] Failed to parse stored polling intervals:", e) + } + } + }, []) + + const updateInterval = (key: keyof PollingIntervals, value: number) => { + setIntervals((prev) => { + const newIntervals = { ...prev, [key]: value } + if (typeof window !== "undefined") { + localStorage.setItem(STORAGE_KEY, JSON.stringify(newIntervals)) + } + return newIntervals + }) + } + + return {children} +} + +export function usePollingConfig() { + const context = useContext(PollingConfigContext) + if (!context) { + // During SSR or when provider is not available, return defaults + if (typeof window === "undefined") { + return { + intervals: DEFAULT_INTERVALS, + updateInterval: () => {}, + } + } + throw new Error("usePollingConfig must be used within PollingConfigProvider") + } + return context +} + +// Interval options for the UI (in milliseconds) +export const INTERVAL_OPTIONS = [ + { label: "10 seconds", value: 10000 }, + { label: "30 seconds", value: 30000 }, + { label: "1 minute", value: 60000 }, + { label: "2 minutes", value: 120000 }, + { label: "5 minutes", value: 300000 }, + { label: "10 minutes", value: 600000 }, + { label: "30 minutes", value: 1800000 }, + { label: "1 hour", value: 3600000 }, +] diff --git a/AppImage/scripts/auth_manager.py b/AppImage/scripts/auth_manager.py index 1fbda2b..7c45dee 100644 --- a/AppImage/scripts/auth_manager.py +++ b/AppImage/scripts/auth_manager.py @@ -5,11 +5,13 @@ Handles all authentication-related operations including: - Password hashing and verification - JWT token generation and validation - Auth status checking +- Two-Factor Authentication (2FA/TOTP) """ import os import json import hashlib +import secrets from datetime import datetime, timedelta from pathlib import Path @@ -20,6 +22,16 @@ except ImportError: JWT_AVAILABLE = False print("Warning: PyJWT not available. Authentication features will be limited.") +try: + import pyotp + import segno + import io + import base64 + TOTP_AVAILABLE = True +except ImportError: + TOTP_AVAILABLE = False + print("Warning: pyotp/segno not available. 2FA features will be disabled.") + # Configuration CONFIG_DIR = Path.home() / ".config" / "proxmenux-monitor" AUTH_CONFIG_FILE = CONFIG_DIR / "auth.json" @@ -41,8 +53,11 @@ def load_auth_config(): "enabled": bool, "username": str, "password_hash": str, - "declined": bool, # True if user explicitly declined auth - "configured": bool # True if auth has been set up (enabled or declined) + "declined": bool, + "configured": bool, + "totp_enabled": bool, # 2FA enabled flag + "totp_secret": str, # TOTP secret key + "backup_codes": list # List of backup codes } """ if not AUTH_CONFIG_FILE.exists(): @@ -51,7 +66,10 @@ def load_auth_config(): "username": None, "password_hash": None, "declined": False, - "configured": False + "configured": False, + "totp_enabled": False, + "totp_secret": None, + "backup_codes": [] } try: @@ -60,6 +78,9 @@ def load_auth_config(): # Ensure all required fields exist config.setdefault("declined", False) config.setdefault("configured", config.get("enabled", False) or config.get("declined", False)) + config.setdefault("totp_enabled", False) + config.setdefault("totp_secret", None) + config.setdefault("backup_codes", []) return config except Exception as e: print(f"Error loading auth config: {e}") @@ -68,7 +89,10 @@ def load_auth_config(): "username": None, "password_hash": None, "declined": False, - "configured": False + "configured": False, + "totp_enabled": False, + "totp_secret": None, + "backup_codes": [] } @@ -141,16 +165,18 @@ def get_auth_status(): "auth_configured": bool, "declined": bool, "username": str or None, - "authenticated": bool + "authenticated": bool, + "totp_enabled": bool # 2FA status } """ config = load_auth_config() return { "auth_enabled": config.get("enabled", False), - "auth_configured": config.get("configured", False), # Frontend expects this field name + "auth_configured": config.get("configured", False), "declined": config.get("declined", False), "username": config.get("username") if config.get("enabled") else None, - "authenticated": False # Will be set to True by the route handler if token is valid + "authenticated": False, + "totp_enabled": config.get("totp_enabled", False) # Include 2FA status } @@ -170,7 +196,10 @@ def setup_auth(username, password): "username": username, "password_hash": hash_password(password), "declined": False, - "configured": True + "configured": True, + "totp_enabled": False, + "totp_secret": None, + "backup_codes": [] } if save_auth_config(config): @@ -190,6 +219,9 @@ def decline_auth(): config["configured"] = True config["username"] = None config["password_hash"] = None + config["totp_enabled"] = False + config["totp_secret"] = None + config["backup_codes"] = [] if save_auth_config(config): return True, "Authentication declined" @@ -204,8 +236,13 @@ def disable_auth(): """ config = load_auth_config() config["enabled"] = False - # Keep configured=True and don't set declined=True - # This allows re-enabling without showing the setup modal again + config["username"] = None + config["password_hash"] = None + config["declined"] = False + config["configured"] = False + config["totp_enabled"] = False + config["totp_secret"] = None + config["backup_codes"] = [] if save_auth_config(config): return True, "Authentication disabled" @@ -256,24 +293,212 @@ def change_password(old_password, new_password): return False, "Failed to save new password" -def authenticate(username, password): +def generate_totp_secret(): + """Generate a new TOTP secret key""" + if not TOTP_AVAILABLE: + return None + return pyotp.random_base32() + + +def generate_totp_qr(username, secret): """ - Authenticate a user with username and password - Returns (success: bool, token: str or None, message: str) + Generate a QR code for TOTP setup + Returns base64 encoded SVG image + """ + if not TOTP_AVAILABLE: + return None + + try: + # Create TOTP URI + totp = pyotp.TOTP(secret) + uri = totp.provisioning_uri( + name=username, + issuer_name="ProxMenux Monitor" + ) + + qr = segno.make(uri) + + # Convert to SVG string + buffer = io.BytesIO() + qr.save(buffer, kind='svg', scale=4, border=2) + svg_bytes = buffer.getvalue() + svg_content = svg_bytes.decode('utf-8') + + # Return as data URL + svg_base64 = base64.b64encode(svg_content.encode()).decode('utf-8') + return f"data:image/svg+xml;base64,{svg_base64}" + except Exception as e: + print(f"Error generating QR code: {e}") + return None + + +def generate_backup_codes(count=8): + """Generate backup codes for 2FA recovery""" + codes = [] + for _ in range(count): + # Generate 8-character alphanumeric code + code = ''.join(secrets.choice('ABCDEFGHJKLMNPQRSTUVWXYZ23456789') for _ in range(8)) + # Format as XXXX-XXXX for readability + formatted = f"{code[:4]}-{code[4:]}" + codes.append({ + "code": hashlib.sha256(formatted.encode()).hexdigest(), + "used": False + }) + return codes + + +def setup_totp(username): + """ + Set up TOTP for a user + Returns (success: bool, secret: str, qr_code: str, backup_codes: list, message: str) + """ + if not TOTP_AVAILABLE: + return False, None, None, None, "2FA is not available (pyotp/segno not installed)" + + config = load_auth_config() + + if not config.get("enabled"): + return False, None, None, None, "Authentication must be enabled first" + + if config.get("username") != username: + return False, None, None, None, "Invalid username" + + # Generate new secret and backup codes + secret = generate_totp_secret() + qr_code = generate_totp_qr(username, secret) + backup_codes_plain = [] + backup_codes_hashed = generate_backup_codes() + + # Generate plain text backup codes for display (only returned once) + for i in range(8): + code = ''.join(secrets.choice('ABCDEFGHJKLMNPQRSTUVWXYZ23456789') for _ in range(8)) + formatted = f"{code[:4]}-{code[4:]}" + backup_codes_plain.append(formatted) + backup_codes_hashed[i]["code"] = hashlib.sha256(formatted.encode()).hexdigest() + + # Store secret and hashed backup codes (not enabled yet until verified) + config["totp_secret"] = secret + config["backup_codes"] = backup_codes_hashed + + if save_auth_config(config): + return True, secret, qr_code, backup_codes_plain, "2FA setup initiated" + else: + return False, None, None, None, "Failed to save 2FA configuration" + + +def verify_totp(username, token, use_backup=False): + """ + Verify a TOTP token or backup code + Returns (success: bool, message: str) + """ + if not TOTP_AVAILABLE and not use_backup: + return False, "2FA is not available" + + config = load_auth_config() + + if not config.get("totp_enabled"): + return False, "2FA is not enabled" + + if config.get("username") != username: + return False, "Invalid username" + + # Check backup code + if use_backup: + token_hash = hashlib.sha256(token.encode()).hexdigest() + for backup_code in config.get("backup_codes", []): + if backup_code["code"] == token_hash and not backup_code["used"]: + backup_code["used"] = True + save_auth_config(config) + return True, "Backup code accepted" + return False, "Invalid or already used backup code" + + # Check TOTP token + totp = pyotp.TOTP(config.get("totp_secret")) + if totp.verify(token, valid_window=1): # Allow 1 time step tolerance + return True, "2FA verification successful" + else: + return False, "Invalid 2FA code" + + +def enable_totp(username, verification_token): + """ + Enable TOTP after successful verification + Returns (success: bool, message: str) + """ + if not TOTP_AVAILABLE: + return False, "2FA is not available" + + config = load_auth_config() + + if not config.get("totp_secret"): + return False, "2FA has not been set up. Please set up 2FA first." + + if config.get("username") != username: + return False, "Invalid username" + + # Verify the token before enabling + totp = pyotp.TOTP(config.get("totp_secret")) + if not totp.verify(verification_token, valid_window=1): + return False, "Invalid verification code. Please try again." + + config["totp_enabled"] = True + + if save_auth_config(config): + return True, "2FA enabled successfully" + else: + return False, "Failed to enable 2FA" + + +def disable_totp(username, password): + """ + Disable TOTP (requires password confirmation) + Returns (success: bool, message: str) + """ + config = load_auth_config() + + if config.get("username") != username: + return False, "Invalid username" + + if not verify_password(password, config.get("password_hash", "")): + return False, "Invalid password" + + config["totp_enabled"] = False + config["totp_secret"] = None + config["backup_codes"] = [] + + if save_auth_config(config): + return True, "2FA disabled successfully" + else: + return False, "Failed to disable 2FA" + + +def authenticate(username, password, totp_token=None): + """ + Authenticate a user with username, password, and optional TOTP + Returns (success: bool, token: str or None, requires_totp: bool, message: str) """ config = load_auth_config() if not config.get("enabled"): - return False, None, "Authentication is not enabled" + return False, None, False, "Authentication is not enabled" if username != config.get("username"): - return False, None, "Invalid username or password" + return False, None, False, "Invalid username or password" if not verify_password(password, config.get("password_hash", "")): - return False, None, "Invalid username or password" + return False, None, False, "Invalid username or password" + + if config.get("totp_enabled"): + if not totp_token: + return False, None, True, "2FA code required" + + # Verify TOTP token or backup code + success, message = verify_totp(username, totp_token, use_backup=len(totp_token) == 9) # Backup codes are formatted XXXX-XXXX + if not success: + return False, None, True, message token = generate_token(username) if token: - return True, token, "Authentication successful" + return True, token, False, "Authentication successful" else: - return False, None, "Failed to generate authentication token" + return False, None, False, "Failed to generate authentication token" diff --git a/AppImage/scripts/build_appimage.sh b/AppImage/scripts/build_appimage.sh index f43b901..54c4898 100644 --- a/AppImage/scripts/build_appimage.sh +++ b/AppImage/scripts/build_appimage.sh @@ -284,6 +284,8 @@ pip3 install --target "$APP_DIR/usr/lib/python3/dist-packages" \ psutil \ requests \ PyJWT \ + pyotp \ + segno \ googletrans==4.0.0-rc1 \ httpx==0.13.3 \ httpcore==0.9.1 \ diff --git a/AppImage/scripts/flask_auth_routes.py b/AppImage/scripts/flask_auth_routes.py index 1b04059..00f4f5f 100644 --- a/AppImage/scripts/flask_auth_routes.py +++ b/AppImage/scripts/flask_auth_routes.py @@ -64,11 +64,14 @@ def auth_login(): data = request.json username = data.get('username') password = data.get('password') + totp_token = data.get('totp_token') # Optional 2FA token - success, token, message = auth_manager.authenticate(username, password) + success, token, requires_totp, message = auth_manager.authenticate(username, password, totp_token) if success: return jsonify({"success": True, "token": token, "message": message}) + elif requires_totp: + return jsonify({"success": False, "requires_totp": True, "message": message}), 200 else: return jsonify({"success": False, "message": message}), 401 except Exception as e: @@ -93,6 +96,10 @@ def auth_enable(): def auth_disable(): """Disable authentication""" try: + token = request.headers.get('Authorization', '').replace('Bearer ', '') + if not token or not auth_manager.verify_token(token): + return jsonify({"success": False, "message": "Unauthorized"}), 401 + success, message = auth_manager.disable_auth() if success: @@ -119,3 +126,95 @@ def auth_change_password(): return jsonify({"success": False, "message": message}), 400 except Exception as e: return jsonify({"success": False, "message": str(e)}), 500 + + +@auth_bp.route('/api/auth/skip', methods=['POST']) +def auth_skip(): + """Skip authentication setup (same as decline)""" + try: + success, message = auth_manager.decline_auth() + + if success: + return jsonify({"success": True, "message": message}) + else: + return jsonify({"success": False, "message": message}), 400 + except Exception as e: + return jsonify({"success": False, "message": str(e)}), 500 + + +@auth_bp.route('/api/auth/totp/setup', methods=['POST']) +def totp_setup(): + """Initialize TOTP setup for a user""" + try: + token = request.headers.get('Authorization', '').replace('Bearer ', '') + username = auth_manager.verify_token(token) + + if not username: + return jsonify({"success": False, "message": "Unauthorized"}), 401 + + success, secret, qr_code, backup_codes, message = auth_manager.setup_totp(username) + + if success: + return jsonify({ + "success": True, + "secret": secret, + "qr_code": qr_code, + "backup_codes": backup_codes, + "message": message + }) + else: + return jsonify({"success": False, "message": message}), 400 + except Exception as e: + return jsonify({"success": False, "message": str(e)}), 500 + + +@auth_bp.route('/api/auth/totp/enable', methods=['POST']) +def totp_enable(): + """Enable TOTP after verification""" + try: + token = request.headers.get('Authorization', '').replace('Bearer ', '') + username = auth_manager.verify_token(token) + + if not username: + return jsonify({"success": False, "message": "Unauthorized"}), 401 + + data = request.json + verification_token = data.get('token') + + if not verification_token: + return jsonify({"success": False, "message": "Verification token required"}), 400 + + success, message = auth_manager.enable_totp(username, verification_token) + + if success: + return jsonify({"success": True, "message": message}) + else: + return jsonify({"success": False, "message": message}), 400 + except Exception as e: + return jsonify({"success": False, "message": str(e)}), 500 + + +@auth_bp.route('/api/auth/totp/disable', methods=['POST']) +def totp_disable(): + """Disable TOTP (requires password confirmation)""" + try: + token = request.headers.get('Authorization', '').replace('Bearer ', '') + username = auth_manager.verify_token(token) + + if not username: + return jsonify({"success": False, "message": "Unauthorized"}), 401 + + data = request.json + password = data.get('password') + + if not password: + return jsonify({"success": False, "message": "Password required"}), 400 + + success, message = auth_manager.disable_totp(username, password) + + if success: + return jsonify({"success": True, "message": message}) + else: + return jsonify({"success": False, "message": message}), 400 + except Exception as e: + return jsonify({"success": False, "message": str(e)}), 500 diff --git a/AppImage/scripts/flask_health_routes.py b/AppImage/scripts/flask_health_routes.py index 766c3e4..86612df 100644 --- a/AppImage/scripts/flask_health_routes.py +++ b/AppImage/scripts/flask_health_routes.py @@ -24,3 +24,16 @@ def get_health_details(): return jsonify(details) except Exception as e: return jsonify({'error': str(e)}), 500 + +@health_bp.route('/api/system-info', methods=['GET']) +def get_system_info(): + """ + Get lightweight system info for header display. + Returns: hostname, uptime, and cached health status. + This is optimized for minimal server impact. + """ + try: + info = health_monitor.get_system_info() + return jsonify(info) + except Exception as e: + return jsonify({'error': str(e)}), 500 diff --git a/AppImage/scripts/flask_server.py b/AppImage/scripts/flask_server.py index 654ed92..a726c5d 100644 --- a/AppImage/scripts/flask_server.py +++ b/AppImage/scripts/flask_server.py @@ -950,31 +950,37 @@ def get_pcie_link_speed(disk_name): import re match = re.match(r'(nvme\d+)n\d+', disk_name) if not match: - print(f"[v0] Could not extract controller from {disk_name}") + # print(f"[v0] Could not extract controller from {disk_name}") + pass return pcie_info controller = match.group(1) # nvme0n1 -> nvme0 - print(f"[v0] Getting PCIe info for {disk_name}, controller: {controller}") + # print(f"[v0] Getting PCIe info for {disk_name}, controller: {controller}") + pass # Path to PCIe device in sysfs sys_path = f'/sys/class/nvme/{controller}/device' - print(f"[v0] Checking sys_path: {sys_path}, exists: {os.path.exists(sys_path)}") + # print(f"[v0] Checking sys_path: {sys_path}, exists: {os.path.exists(sys_path)}") + pass if os.path.exists(sys_path): try: pci_address = os.path.basename(os.readlink(sys_path)) - print(f"[v0] PCI address for {disk_name}: {pci_address}") + # print(f"[v0] PCI address for {disk_name}: {pci_address}") + pass # Use lspci to get detailed PCIe information result = subprocess.run(['lspci', '-vvv', '-s', pci_address], capture_output=True, text=True, timeout=5) if result.returncode == 0: - print(f"[v0] lspci output for {pci_address}:") + # print(f"[v0] lspci output for {pci_address}:") + pass for line in result.stdout.split('\n'): # Look for "LnkSta:" line which shows current link status if 'LnkSta:' in line: - print(f"[v0] Found LnkSta: {line}") + # print(f"[v0] Found LnkSta: {line}") + pass # Example: "LnkSta: Speed 8GT/s, Width x4" if 'Speed' in line: speed_match = re.search(r'Speed\s+([\d.]+)GT/s', line) @@ -990,17 +996,20 @@ def get_pcie_link_speed(disk_name): pcie_info['pcie_gen'] = '4.0' else: pcie_info['pcie_gen'] = '5.0' - print(f"[v0] Current PCIe gen: {pcie_info['pcie_gen']}") + # print(f"[v0] Current PCIe gen: {pcie_info['pcie_gen']}") + pass if 'Width' in line: width_match = re.search(r'Width\s+x(\d+)', line) if width_match: pcie_info['pcie_width'] = f'x{width_match.group(1)}' - print(f"[v0] Current PCIe width: {pcie_info['pcie_width']}") + # print(f"[v0] Current PCIe width: {pcie_info['pcie_width']}") + pass # Look for "LnkCap:" line which shows maximum capabilities elif 'LnkCap:' in line: - print(f"[v0] Found LnkCap: {line}") + # print(f"[v0] Found LnkCap: {line}") + pass if 'Speed' in line: speed_match = re.search(r'Speed\s+([\d.]+)GT/s', line) if speed_match: @@ -1015,39 +1024,48 @@ def get_pcie_link_speed(disk_name): pcie_info['pcie_max_gen'] = '4.0' else: pcie_info['pcie_max_gen'] = '5.0' - print(f"[v0] Max PCIe gen: {pcie_info['pcie_max_gen']}") + # print(f"[v0] Max PCIe gen: {pcie_info['pcie_max_gen']}") + pass if 'Width' in line: width_match = re.search(r'Width\s+x(\d+)', line) if width_match: pcie_info['pcie_max_width'] = f'x{width_match.group(1)}' - print(f"[v0] Max PCIe width: {pcie_info['pcie_max_width']}") + # print(f"[v0] Max PCIe width: {pcie_info['pcie_max_width']}") + pass else: - print(f"[v0] lspci failed with return code: {result.returncode}") + # print(f"[v0] lspci failed with return code: {result.returncode}") + pass except Exception as e: - print(f"[v0] Error getting PCIe info via lspci: {e}") + # print(f"[v0] Error getting PCIe info via lspci: {e}") + pass import traceback traceback.print_exc() else: - print(f"[v0] sys_path does not exist: {sys_path}") + # print(f"[v0] sys_path does not exist: {sys_path}") + pass alt_sys_path = f'/sys/block/{disk_name}/device/device' - print(f"[v0] Trying alternative path: {alt_sys_path}, exists: {os.path.exists(alt_sys_path)}") + # print(f"[v0] Trying alternative path: {alt_sys_path}, exists: {os.path.exists(alt_sys_path)}") + pass if os.path.exists(alt_sys_path): try: # Get PCI address from the alternative path pci_address = os.path.basename(os.readlink(alt_sys_path)) - print(f"[v0] PCI address from alt path for {disk_name}: {pci_address}") + # print(f"[v0] PCI address from alt path for {disk_name}: {pci_address}") + pass # Use lspci to get detailed PCIe information result = subprocess.run(['lspci', '-vvv', '-s', pci_address], capture_output=True, text=True, timeout=5) if result.returncode == 0: - print(f"[v0] lspci output for {pci_address} (from alt path):") + # print(f"[v0] lspci output for {pci_address} (from alt path):") + pass for line in result.stdout.split('\n'): # Look for "LnkSta:" line which shows current link status if 'LnkSta:' in line: - print(f"[v0] Found LnkSta: {line}") + # print(f"[v0] Found LnkSta: {line}") + pass if 'Speed' in line: speed_match = re.search(r'Speed\s+([\d.]+)GT/s', line) if speed_match: @@ -1062,17 +1080,20 @@ def get_pcie_link_speed(disk_name): pcie_info['pcie_gen'] = '4.0' else: pcie_info['pcie_gen'] = '5.0' - print(f"[v0] Current PCIe gen: {pcie_info['pcie_gen']}") + # print(f"[v0] Current PCIe gen: {pcie_info['pcie_gen']}") + pass if 'Width' in line: width_match = re.search(r'Width\s+x(\d+)', line) if width_match: pcie_info['pcie_width'] = f'x{width_match.group(1)}' - print(f"[v0] Current PCIe width: {pcie_info['pcie_width']}") + # print(f"[v0] Current PCIe width: {pcie_info['pcie_width']}") + pass # Look for "LnkCap:" line which shows maximum capabilities elif 'LnkCap:' in line: - print(f"[v0] Found LnkCap: {line}") + # print(f"[v0] Found LnkCap: {line}") + pass if 'Speed' in line: speed_match = re.search(r'Speed\s+([\d.]+)GT/s', line) if speed_match: @@ -1087,26 +1108,32 @@ def get_pcie_link_speed(disk_name): pcie_info['pcie_max_gen'] = '4.0' else: pcie_info['pcie_max_gen'] = '5.0' - print(f"[v0] Max PCIe gen: {pcie_info['pcie_max_gen']}") + # print(f"[v0] Max PCIe gen: {pcie_info['pcie_max_gen']}") + pass if 'Width' in line: width_match = re.search(r'Width\s+x(\d+)', line) if width_match: pcie_info['pcie_max_width'] = f'x{width_match.group(1)}' - print(f"[v0] Max PCIe width: {pcie_info['pcie_max_width']}") + # print(f"[v0] Max PCIe width: {pcie_info['pcie_max_width']}") + pass else: - print(f"[v0] lspci failed with return code: {result.returncode}") + # print(f"[v0] lspci failed with return code: {result.returncode}") + pass except Exception as e: - print(f"[v0] Error getting PCIe info from alt path: {e}") + # print(f"[v0] Error getting PCIe info from alt path: {e}") + pass import traceback traceback.print_exc() except Exception as e: - print(f"[v0] Error in get_pcie_link_speed for {disk_name}: {e}") + # print(f"[v0] Error in get_pcie_link_speed for {disk_name}: {e}") + pass import traceback traceback.print_exc() - print(f"[v0] Final PCIe info for {disk_name}: {pcie_info}") + # print(f"[v0] Final PCIe info for {disk_name}: {pcie_info}") + pass return pcie_info # get_pcie_link_speed function definition ends here @@ -5397,7 +5424,7 @@ def api_health(): return jsonify({ 'status': 'healthy', 'timestamp': datetime.now().isoformat(), - 'version': '1.0.0' + 'version': '1.0.1' }) @app.route('/api/prometheus', methods=['GET']) @@ -5655,57 +5682,6 @@ def api_prometheus(): traceback.print_exc() return f'# Error generating metrics: {str(e)}\n', 500, {'Content-Type': 'text/plain; charset=utf-8'} -@app.route('/api/system-info', methods=['GET']) -def api_system_info(): - """Get system and node information for dashboard header""" - try: - hostname = socket.gethostname() - node_id = f"pve-{hostname}" - pve_version = None - - # Try to get Proxmox version - try: - result = subprocess.run(['pveversion'], capture_output=True, text=True, timeout=5) - if result.returncode == 0: - pve_version = result.stdout.strip().split('\n')[0] - except: - pass - - # Try to get node info from Proxmox API - try: - result = subprocess.run(['pvesh', 'get', '/nodes', '--output-format', 'json'], - capture_output=True, text=True, timeout=5) - if result.returncode == 0: - nodes = json.loads(result.stdout) - if nodes and len(nodes) > 0: - node_info = nodes[0] - node_id = node_info.get('node', node_id) - hostname = node_info.get('node', hostname) - except: - pass - - response = { - 'hostname': hostname, - 'node_id': node_id, - 'status': 'online', - 'timestamp': datetime.now().isoformat() - } - - if pve_version: - response['pve_version'] = pve_version - else: - response['error'] = 'Proxmox version not available - pveversion command not found' - - return jsonify(response) - except Exception as e: - # print(f"Error getting system info: {e}") - pass - return jsonify({ - 'error': f'Unable to access system information: {str(e)}', - 'hostname': socket.gethostname(), - 'status': 'error', - 'timestamp': datetime.now().isoformat() - }) @app.route('/api/info', methods=['GET']) def api_info(): diff --git a/AppImage/scripts/health_monitor.py b/AppImage/scripts/health_monitor.py index 7eea8f1..8a175a3 100644 --- a/AppImage/scripts/health_monitor.py +++ b/AppImage/scripts/health_monitor.py @@ -4,7 +4,7 @@ Provides comprehensive, lightweight health checks for Proxmox systems. Optimized for minimal system impact with intelligent thresholds and hysteresis. Author: MacRimi -Version: 1.0 (Light Health Logic) +Version: 1.1 (Optimized for minimal overhead) """ import psutil @@ -12,7 +12,7 @@ import subprocess import json import time import os -from typing import Dict, List, Any, Tuple +from typing import Dict, List, Any, Tuple, Optional from datetime import datetime, timedelta from collections import defaultdict @@ -20,23 +20,24 @@ class HealthMonitor: """ Monitors system health across multiple components with minimal impact. Implements hysteresis, intelligent caching, and progressive escalation. + Only reports problems, not verbose OK statuses. """ # CPU Thresholds CPU_WARNING = 85 CPU_CRITICAL = 95 CPU_RECOVERY = 75 - CPU_WARNING_DURATION = 60 # seconds - CPU_CRITICAL_DURATION = 120 # seconds - CPU_RECOVERY_DURATION = 120 # seconds + CPU_WARNING_DURATION = 60 + CPU_CRITICAL_DURATION = 120 + CPU_RECOVERY_DURATION = 120 # Memory Thresholds MEMORY_WARNING = 85 MEMORY_CRITICAL = 95 - MEMORY_DURATION = 60 # seconds - SWAP_WARNING_DURATION = 300 # 5 minutes - SWAP_CRITICAL_PERCENT = 5 # 5% of RAM - SWAP_CRITICAL_DURATION = 120 # 2 minutes + MEMORY_DURATION = 60 + SWAP_WARNING_DURATION = 300 + SWAP_CRITICAL_PERCENT = 5 + SWAP_CRITICAL_DURATION = 120 # Storage Thresholds STORAGE_WARNING = 85 @@ -47,23 +48,28 @@ class HealthMonitor: TEMP_CRITICAL = 90 # Network Thresholds - NETWORK_LATENCY_WARNING = 100 # ms - NETWORK_LATENCY_CRITICAL = 300 # ms - NETWORK_TIMEOUT = 0.9 # seconds - NETWORK_INACTIVE_DURATION = 600 # 10 minutes + NETWORK_LATENCY_WARNING = 100 + NETWORK_LATENCY_CRITICAL = 300 + NETWORK_TIMEOUT = 0.9 + NETWORK_INACTIVE_DURATION = 600 # Log Thresholds LOG_ERRORS_WARNING = 5 - LOG_ERRORS_CRITICAL = 6 - LOG_WARNINGS_WARNING = 10 + LOG_ERRORS_CRITICAL = 10 + LOG_WARNINGS_WARNING = 15 LOG_WARNINGS_CRITICAL = 30 - LOG_CHECK_INTERVAL = 300 # 5 minutes + LOG_CHECK_INTERVAL = 300 + + # Updates Thresholds + UPDATES_WARNING = 10 + UPDATES_CRITICAL = 30 # Critical keywords for immediate escalation CRITICAL_LOG_KEYWORDS = [ 'I/O error', 'EXT4-fs error', 'XFS', 'LVM activation failed', 'md/raid: device failed', 'Out of memory', 'kernel panic', - 'filesystem read-only', 'cannot mount' + 'filesystem read-only', 'cannot mount', 'failed to start', + 'task hung', 'oom_kill' ] # PVE Critical Services @@ -71,12 +77,81 @@ class HealthMonitor: def __init__(self): """Initialize health monitor with state tracking""" - self.state_history = defaultdict(list) # For hysteresis - self.last_check_times = {} # Cache check times - self.cached_results = {} # Cache results - self.network_baseline = {} # Network traffic baseline - self.io_error_history = defaultdict(list) # I/O error tracking + self.state_history = defaultdict(list) + self.last_check_times = {} + self.cached_results = {} + self.network_baseline = {} + self.io_error_history = defaultdict(list) + self.failed_vm_history = set() # Track VMs that failed to start + def get_system_info(self) -> Dict[str, Any]: + """ + Get lightweight system info for header display. + Returns: hostname, uptime, and cached health status. + This is extremely lightweight and uses cached health status. + """ + try: + # Get hostname + hostname = os.uname().nodename + + # Get uptime (very cheap operation) + uptime_seconds = time.time() - psutil.boot_time() + + # Get cached health status (no expensive checks) + health_status = self.get_cached_health_status() + + return { + 'hostname': hostname, + 'uptime_seconds': int(uptime_seconds), + 'uptime_formatted': self._format_uptime(uptime_seconds), + 'health': health_status, + 'timestamp': datetime.now().isoformat() + } + except Exception as e: + return { + 'hostname': 'unknown', + 'uptime_seconds': 0, + 'uptime_formatted': 'Unknown', + 'health': {'status': 'UNKNOWN', 'summary': f'Error: {str(e)}'}, + 'timestamp': datetime.now().isoformat() + } + + def _format_uptime(self, seconds: float) -> str: + """Format uptime in human-readable format""" + days = int(seconds // 86400) + hours = int((seconds % 86400) // 3600) + minutes = int((seconds % 3600) // 60) + + if days > 0: + return f"{days}d {hours}h {minutes}m" + elif hours > 0: + return f"{hours}h {minutes}m" + else: + return f"{minutes}m" + + def get_cached_health_status(self) -> Dict[str, str]: + """ + Get cached health status without running expensive checks. + Returns the last calculated status or triggers a check if too old. + """ + cache_key = 'overall_health' + current_time = time.time() + + # If cache exists and is less than 60 seconds old, return it + if cache_key in self.last_check_times: + if current_time - self.last_check_times[cache_key] < 60: + return self.cached_results.get(cache_key, {'status': 'OK', 'summary': 'System operational'}) + + # Otherwise, calculate and cache + status = self.get_overall_status() + self.cached_results[cache_key] = { + 'status': status['status'], + 'summary': status['summary'] + } + self.last_check_times[cache_key] = current_time + + return self.cached_results[cache_key] + def get_overall_status(self) -> Dict[str, Any]: """Get overall health status summary with minimal overhead""" details = self.get_detailed_status() @@ -112,88 +187,99 @@ class HealthMonitor: """ Get comprehensive health status with all checks. Returns JSON structure matching the specification. + OPTIMIZED: Only shows problems, not verbose OK messages. """ details = {} critical_issues = [] warning_issues = [] - # Priority 1: Services PVE / FS / Storage + # Priority 1: Services PVE services_status = self._check_pve_services() - details['services'] = services_status - if services_status['status'] == 'CRITICAL': - critical_issues.append(services_status.get('reason', 'Service failure')) - elif services_status['status'] == 'WARNING': - warning_issues.append(services_status.get('reason', 'Service issue')) + if services_status['status'] != 'OK': + details['services'] = services_status + if services_status['status'] == 'CRITICAL': + critical_issues.append(services_status.get('reason', 'Service failure')) + elif services_status['status'] == 'WARNING': + warning_issues.append(services_status.get('reason', 'Service issue')) - storage_status = self._check_storage_comprehensive() - details['storage'] = storage_status - for storage_name, storage_data in storage_status.items(): - if isinstance(storage_data, dict): - if storage_data.get('status') == 'CRITICAL': - critical_issues.append(f"{storage_name}: {storage_data.get('reason', 'Storage failure')}") - elif storage_data.get('status') == 'WARNING': - warning_issues.append(f"{storage_name}: {storage_data.get('reason', 'Storage issue')}") + storage_status = self._check_storage_optimized() + if storage_status and storage_status.get('status') != 'OK': + details['storage'] = storage_status + if storage_status.get('status') == 'CRITICAL': + critical_issues.append(storage_status.get('reason', 'Storage failure')) + elif storage_status.get('status') == 'WARNING': + warning_issues.append(storage_status.get('reason', 'Storage issue')) - # Priority 2: Disks / I/O - disks_status = self._check_disks_io() - details['disks'] = disks_status - for disk_name, disk_data in disks_status.items(): - if isinstance(disk_data, dict): - if disk_data.get('status') == 'CRITICAL': - critical_issues.append(f"{disk_name}: {disk_data.get('reason', 'Disk failure')}") - elif disk_data.get('status') == 'WARNING': - warning_issues.append(f"{disk_name}: {disk_data.get('reason', 'Disk issue')}") + disks_status = self._check_disks_optimized() + if disks_status and disks_status.get('status') != 'OK': + details['disks'] = disks_status + if disks_status.get('status') == 'CRITICAL': + critical_issues.append(disks_status.get('reason', 'Disk failure')) + elif disks_status.get('status') == 'WARNING': + warning_issues.append(disks_status.get('reason', 'Disk issue')) - # Priority 3: VM/CT - vms_status = self._check_vms_cts() - details['vms'] = vms_status - if vms_status.get('status') == 'CRITICAL': - critical_issues.append(vms_status.get('reason', 'VM/CT failure')) - elif vms_status.get('status') == 'WARNING': - warning_issues.append(vms_status.get('reason', 'VM/CT issue')) + vms_status = self._check_vms_cts_optimized() + if vms_status and vms_status.get('status') != 'OK': + details['vms'] = vms_status + if vms_status.get('status') == 'CRITICAL': + critical_issues.append(vms_status.get('reason', 'VM/CT failure')) + elif vms_status.get('status') == 'WARNING': + warning_issues.append(vms_status.get('reason', 'VM/CT issue')) - # Priority 4: Network - network_status = self._check_network_comprehensive() - details['network'] = network_status - if network_status.get('status') == 'CRITICAL': - critical_issues.append(network_status.get('reason', 'Network failure')) - elif network_status.get('status') == 'WARNING': - warning_issues.append(network_status.get('reason', 'Network issue')) + network_status = self._check_network_optimized() + if network_status and network_status.get('status') != 'OK': + details['network'] = network_status + if network_status.get('status') == 'CRITICAL': + critical_issues.append(network_status.get('reason', 'Network failure')) + elif network_status.get('status') == 'WARNING': + warning_issues.append(network_status.get('reason', 'Network issue')) - # Priority 5: CPU/RAM + # Priority 5: CPU/RAM (solo si hay problemas) cpu_status = self._check_cpu_with_hysteresis() - details['cpu'] = cpu_status - if cpu_status.get('status') == 'WARNING': - warning_issues.append(cpu_status.get('reason', 'CPU high')) + if cpu_status.get('status') != 'OK': + details['cpu'] = cpu_status + if cpu_status.get('status') == 'WARNING': + warning_issues.append(cpu_status.get('reason', 'CPU high')) + elif cpu_status.get('status') == 'CRITICAL': + critical_issues.append(cpu_status.get('reason', 'CPU critical')) memory_status = self._check_memory_comprehensive() - details['memory'] = memory_status - if memory_status.get('status') == 'CRITICAL': - critical_issues.append(memory_status.get('reason', 'Memory critical')) - elif memory_status.get('status') == 'WARNING': - warning_issues.append(memory_status.get('reason', 'Memory high')) + if memory_status.get('status') != 'OK': + details['memory'] = memory_status + if memory_status.get('status') == 'CRITICAL': + critical_issues.append(memory_status.get('reason', 'Memory critical')) + elif memory_status.get('status') == 'WARNING': + warning_issues.append(memory_status.get('reason', 'Memory high')) - # Priority 6: Logs + # Priority 6: Logs (solo errores críticos) logs_status = self._check_logs_lightweight() - details['logs'] = logs_status - if logs_status.get('status') == 'CRITICAL': - critical_issues.append(logs_status.get('reason', 'Critical log errors')) - elif logs_status.get('status') == 'WARNING': - warning_issues.append(logs_status.get('reason', 'Log warnings')) + if logs_status.get('status') != 'OK': + details['logs'] = logs_status + if logs_status.get('status') == 'CRITICAL': + critical_issues.append(logs_status.get('reason', 'Critical log errors')) + elif logs_status.get('status') == 'WARNING': + warning_issues.append(logs_status.get('reason', 'Log warnings')) - # Priority 7: Extras (Security, Certificates, Uptime) + updates_status = self._check_updates() + if updates_status and updates_status.get('status') != 'OK': + details['updates'] = updates_status + if updates_status.get('status') == 'WARNING': + warning_issues.append(updates_status.get('reason', 'Updates pending')) + + # Priority 7: Security (solo problemas) security_status = self._check_security() - details['security'] = security_status - if security_status.get('status') == 'WARNING': - warning_issues.append(security_status.get('reason', 'Security issue')) + if security_status.get('status') != 'OK': + details['security'] = security_status + if security_status.get('status') == 'WARNING': + warning_issues.append(security_status.get('reason', 'Security issue')) # Determine overall status if critical_issues: overall = 'CRITICAL' - summary = '; '.join(critical_issues[:3]) # Top 3 critical issues + summary = '; '.join(critical_issues[:3]) elif warning_issues: overall = 'WARNING' - summary = '; '.join(warning_issues[:3]) # Top 3 warnings + summary = '; '.join(warning_issues[:3]) else: overall = 'OK' summary = 'All systems operational' @@ -206,29 +292,22 @@ class HealthMonitor: } def _check_cpu_with_hysteresis(self) -> Dict[str, Any]: - """ - Check CPU with hysteresis to avoid flapping alerts. - Requires sustained high usage before triggering. - """ + """Check CPU with hysteresis to avoid flapping alerts""" try: - # Get CPU usage (1 second sample to minimize impact) cpu_percent = psutil.cpu_percent(interval=1) current_time = time.time() - # Track state history state_key = 'cpu_usage' self.state_history[state_key].append({ 'value': cpu_percent, 'time': current_time }) - # Keep only recent history (last 5 minutes) self.state_history[state_key] = [ entry for entry in self.state_history[state_key] if current_time - entry['time'] < 300 ] - # Check for sustained high usage critical_duration = sum( 1 for entry in self.state_history[state_key] if entry['value'] >= self.CPU_CRITICAL and @@ -247,8 +326,7 @@ class HealthMonitor: current_time - entry['time'] <= self.CPU_RECOVERY_DURATION ) - # Determine status with hysteresis - if critical_duration >= 2: # 2+ readings in critical range + if critical_duration >= 2: status = 'CRITICAL' reason = f'CPU >{self.CPU_CRITICAL}% for {self.CPU_CRITICAL_DURATION}s' elif warning_duration >= 2 and recovery_duration < 2: @@ -258,7 +336,6 @@ class HealthMonitor: status = 'OK' reason = None - # Get temperature if available (checked once per minute max) temp_status = self._check_cpu_temperature() result = { @@ -270,7 +347,7 @@ class HealthMonitor: if reason: result['reason'] = reason - if temp_status: + if temp_status and temp_status.get('status') != 'UNKNOWN': result['temperature'] = temp_status if temp_status.get('status') == 'CRITICAL': result['status'] = 'CRITICAL' @@ -284,18 +361,16 @@ class HealthMonitor: except Exception as e: return {'status': 'UNKNOWN', 'reason': f'CPU check failed: {str(e)}'} - def _check_cpu_temperature(self) -> Dict[str, Any]: + def _check_cpu_temperature(self) -> Optional[Dict[str, Any]]: """Check CPU temperature (cached, max 1 check per minute)""" cache_key = 'cpu_temp' current_time = time.time() - # Check cache if cache_key in self.last_check_times: if current_time - self.last_check_times[cache_key] < 60: - return self.cached_results.get(cache_key, {}) + return self.cached_results.get(cache_key) try: - # Try lm-sensors first result = subprocess.run( ['sensors', '-A', '-u'], capture_output=True, @@ -338,17 +413,10 @@ class HealthMonitor: self.last_check_times[cache_key] = current_time return temp_result - # If sensors not available, return UNKNOWN (doesn't penalize) - unknown_result = {'status': 'UNKNOWN', 'reason': 'No temperature sensors available'} - self.cached_results[cache_key] = unknown_result - self.last_check_times[cache_key] = current_time - return unknown_result + return None except Exception: - unknown_result = {'status': 'UNKNOWN', 'reason': 'Temperature check unavailable'} - self.cached_results[cache_key] = unknown_result - self.last_check_times[cache_key] = current_time - return unknown_result + return None def _check_memory_comprehensive(self) -> Dict[str, Any]: """Check memory including RAM and swap with sustained thresholds""" @@ -361,7 +429,6 @@ class HealthMonitor: swap_percent = swap.percent if swap.total > 0 else 0 swap_vs_ram = (swap.used / memory.total * 100) if memory.total > 0 else 0 - # Track memory state state_key = 'memory_usage' self.state_history[state_key].append({ 'mem_percent': mem_percent, @@ -370,13 +437,11 @@ class HealthMonitor: 'time': current_time }) - # Keep only recent history self.state_history[state_key] = [ entry for entry in self.state_history[state_key] if current_time - entry['time'] < 600 ] - # Check sustained high memory mem_critical = sum( 1 for entry in self.state_history[state_key] if entry['mem_percent'] >= self.MEMORY_CRITICAL and @@ -389,7 +454,6 @@ class HealthMonitor: current_time - entry['time'] <= self.MEMORY_DURATION ) - # Check swap usage swap_critical = sum( 1 for entry in self.state_history[state_key] if entry['swap_vs_ram'] > self.SWAP_CRITICAL_PERCENT and @@ -402,7 +466,6 @@ class HealthMonitor: current_time - entry['time'] <= self.SWAP_WARNING_DURATION ) - # Determine status if mem_critical >= 2: status = 'CRITICAL' reason = f'RAM >{self.MEMORY_CRITICAL}% for {self.MEMORY_DURATION}s' @@ -435,53 +498,60 @@ class HealthMonitor: except Exception as e: return {'status': 'UNKNOWN', 'reason': f'Memory check failed: {str(e)}'} - def _check_storage_comprehensive(self) -> Dict[str, Any]: + def _check_storage_optimized(self) -> Optional[Dict[str, Any]]: """ - Comprehensive storage check including filesystems, mount points, - LVM, and Proxmox storages. + Optimized storage check - only reports problems. + Checks critical mounts, LVM, and Proxmox storages. """ - storage_results = {} + issues = [] + storage_details = {} # Check critical filesystems - critical_mounts = ['/', '/var', '/var/lib/vz'] + critical_mounts = ['/', '/var/lib/vz'] for mount_point in critical_mounts: - if os.path.exists(mount_point): - fs_status = self._check_filesystem(mount_point) - storage_results[mount_point] = fs_status + if not os.path.exists(mount_point): + issues.append(f'{mount_point} not mounted') + storage_details[mount_point] = { + 'status': 'CRITICAL', + 'reason': 'Not mounted' + } + continue + + fs_status = self._check_filesystem(mount_point) + if fs_status['status'] != 'OK': + issues.append(f"{mount_point}: {fs_status['reason']}") + storage_details[mount_point] = fs_status - # Check all mounted filesystems - try: - partitions = psutil.disk_partitions() - for partition in partitions: - if partition.mountpoint not in critical_mounts: - try: - fs_status = self._check_filesystem(partition.mountpoint) - storage_results[partition.mountpoint] = fs_status - except PermissionError: - continue - except Exception as e: - storage_results['partitions_error'] = { - 'status': 'WARNING', - 'reason': f'Could not enumerate partitions: {str(e)}' - } - - # Check LVM (especially local-lvm) + # Check LVM lvm_status = self._check_lvm() - if lvm_status: - storage_results['lvm'] = lvm_status + if lvm_status and lvm_status.get('status') != 'OK': + issues.append(lvm_status.get('reason', 'LVM issue')) + storage_details['lvm'] = lvm_status - # Check Proxmox storages + # Check Proxmox storages (PBS, NFS, etc) pve_storages = self._check_proxmox_storages() - if pve_storages: - storage_results.update(pve_storages) + for storage_name, storage_data in pve_storages.items(): + if storage_data.get('status') != 'OK': + issues.append(f"{storage_name}: {storage_data.get('reason', 'Storage issue')}") + storage_details[storage_name] = storage_data - return storage_results + # If no issues, return None (optimized) + if not issues: + return {'status': 'OK'} + + # Determine overall status + has_critical = any(d.get('status') == 'CRITICAL' for d in storage_details.values()) + + return { + 'status': 'CRITICAL' if has_critical else 'WARNING', + 'reason': '; '.join(issues[:3]), + 'details': storage_details + } def _check_filesystem(self, mount_point: str) -> Dict[str, Any]: """Check individual filesystem for space and mount status""" try: - # Check if mounted result = subprocess.run( ['mountpoint', '-q', mount_point], capture_output=True, @@ -491,7 +561,7 @@ class HealthMonitor: if result.returncode != 0: return { 'status': 'CRITICAL', - 'reason': f'Not mounted' + 'reason': 'Not mounted' } # Check if read-only @@ -506,7 +576,6 @@ class HealthMonitor: 'reason': 'Mounted read-only' } - # Check disk usage usage = psutil.disk_usage(mount_point) percent = usage.percent @@ -522,9 +591,7 @@ class HealthMonitor: result = { 'status': status, - 'usage_percent': round(percent, 1), - 'free_gb': round(usage.free / (1024**3), 2), - 'total_gb': round(usage.total / (1024**3), 2) + 'usage_percent': round(percent, 1) } if reason: @@ -538,7 +605,7 @@ class HealthMonitor: 'reason': f'Check failed: {str(e)}' } - def _check_lvm(self) -> Dict[str, Any]: + def _check_lvm(self) -> Optional[Dict[str, Any]]: """Check LVM volumes, especially local-lvm""" try: result = subprocess.run( @@ -549,10 +616,7 @@ class HealthMonitor: ) if result.returncode != 0: - return { - 'status': 'WARNING', - 'reason': 'LVM not available or no volumes' - } + return None volumes = [] local_lvm_found = False @@ -568,30 +632,22 @@ class HealthMonitor: if 'local-lvm' in lv_name or 'local-lvm' in vg_name: local_lvm_found = True - if not local_lvm_found and volumes: + if volumes and not local_lvm_found: return { 'status': 'CRITICAL', - 'reason': 'local-lvm volume not found', - 'volumes': volumes + 'reason': 'local-lvm volume not found' } - return { - 'status': 'OK', - 'volumes': volumes - } + return {'status': 'OK'} - except Exception as e: - return { - 'status': 'WARNING', - 'reason': f'LVM check failed: {str(e)}' - } + except Exception: + return None def _check_proxmox_storages(self) -> Dict[str, Any]: - """Check Proxmox-specific storages (NFS, CIFS, PBS)""" + """Check Proxmox-specific storages (only report problems)""" storages = {} try: - # Read Proxmox storage configuration if os.path.exists('/etc/pve/storage.cfg'): with open('/etc/pve/storage.cfg', 'r') as f: current_storage = None @@ -609,13 +665,7 @@ class HealthMonitor: path = line.split(None, 1)[1] if storage_type == 'dir': - if os.path.exists(path): - storages[f'storage_{current_storage}'] = { - 'status': 'OK', - 'type': 'dir', - 'path': path - } - else: + if not os.path.exists(path): storages[f'storage_{current_storage}'] = { 'status': 'CRITICAL', 'reason': 'Directory does not exist', @@ -625,21 +675,20 @@ class HealthMonitor: current_storage = None storage_type = None - except Exception as e: - storages['pve_storage_config'] = { - 'status': 'WARNING', - 'reason': f'Could not read storage config: {str(e)}' - } + except Exception: + pass return storages - def _check_disks_io(self) -> Dict[str, Any]: - """Check disk I/O errors from dmesg (lightweight)""" - disks = {} + def _check_disks_optimized(self) -> Optional[Dict[str, Any]]: + """ + Optimized disk check - only reports I/O errors and SMART issues. + """ current_time = time.time() + disk_issues = {} try: - # Only check dmesg for recent errors (last 2 seconds of kernel log) + # Check dmesg for I/O errors result = subprocess.run( ['dmesg', '-T', '--level=err,warn', '--since', '5 minutes ago'], capture_output=True, @@ -648,21 +697,15 @@ class HealthMonitor: ) if result.returncode == 0: - io_errors = defaultdict(int) - for line in result.stdout.split('\n'): line_lower = line.lower() if any(keyword in line_lower for keyword in ['i/o error', 'ata error', 'scsi error']): - # Extract disk name for part in line.split(): if part.startswith('sd') or part.startswith('nvme') or part.startswith('hd'): disk_name = part.rstrip(':,') - io_errors[disk_name] += 1 - - # Track in history self.io_error_history[disk_name].append(current_time) - # Clean old history (keep last 5 minutes) + # Clean old history for disk in list(self.io_error_history.keys()): self.io_error_history[disk] = [ t for t in self.io_error_history[disk] @@ -672,130 +715,86 @@ class HealthMonitor: error_count = len(self.io_error_history[disk]) if error_count >= 3: - disks[f'/dev/{disk}'] = { + disk_issues[f'/dev/{disk}'] = { 'status': 'CRITICAL', 'reason': f'{error_count} I/O errors in 5 minutes' } elif error_count >= 1: - disks[f'/dev/{disk}'] = { + disk_issues[f'/dev/{disk}'] = { 'status': 'WARNING', 'reason': f'{error_count} I/O error(s) in 5 minutes' } - # If no errors found, report OK - if not disks: - disks['status'] = 'OK' + # If no issues, return OK + if not disk_issues: + return {'status': 'OK'} - return disks + has_critical = any(d.get('status') == 'CRITICAL' for d in disk_issues.values()) - except Exception as e: return { - 'status': 'WARNING', - 'reason': f'Disk I/O check failed: {str(e)}' + 'status': 'CRITICAL' if has_critical else 'WARNING', + 'reason': f"{len(disk_issues)} disk(s) with errors", + 'details': disk_issues } + + except Exception: + return None - def _check_network_comprehensive(self) -> Dict[str, Any]: - """Check network interfaces, bridges, and connectivity""" + def _check_network_optimized(self) -> Optional[Dict[str, Any]]: + """ + Optimized network check - only reports problems. + Checks interfaces down, no connectivity. + """ try: issues = [] interface_details = {} - # Check interface status net_if_stats = psutil.net_if_stats() - net_io = psutil.net_io_counters(pernic=True) - current_time = time.time() for interface, stats in net_if_stats.items(): if interface == 'lo': continue - # Check if interface is down (excluding administratively down) + # Check if important interface is down if not stats.isup: - # Check if it's a bridge or important interface if interface.startswith('vmbr') or interface.startswith('eth') or interface.startswith('ens'): issues.append(f'{interface} is DOWN') interface_details[interface] = { 'status': 'CRITICAL', 'reason': 'Interface DOWN' } - continue - - # Check bridge traffic (if no traffic for 10 minutes) - if interface.startswith('vmbr') and interface in net_io: - io_stats = net_io[interface] - - # Initialize baseline if not exists - if interface not in self.network_baseline: - self.network_baseline[interface] = { - 'rx_bytes': io_stats.bytes_recv, - 'tx_bytes': io_stats.bytes_sent, - 'time': current_time - } - else: - baseline = self.network_baseline[interface] - time_diff = current_time - baseline['time'] - - if time_diff >= self.NETWORK_INACTIVE_DURATION: - rx_diff = io_stats.bytes_recv - baseline['rx_bytes'] - tx_diff = io_stats.bytes_sent - baseline['tx_bytes'] - - if rx_diff == 0 and tx_diff == 0: - issues.append(f'{interface} no traffic for 10+ minutes') - interface_details[interface] = { - 'status': 'WARNING', - 'reason': 'No traffic for 10+ minutes' - } - - # Update baseline - self.network_baseline[interface] = { - 'rx_bytes': io_stats.bytes_recv, - 'tx_bytes': io_stats.bytes_sent, - 'time': current_time - } - # Check gateway/DNS latency (lightweight, cached) + # Check connectivity latency_status = self._check_network_latency() - if latency_status.get('status') != 'OK': + if latency_status and latency_status.get('status') not in ['OK', 'UNKNOWN']: issues.append(latency_status.get('reason', 'Network latency issue')) interface_details['connectivity'] = latency_status - # Determine overall network status - if any('CRITICAL' in str(detail.get('status')) for detail in interface_details.values()): - status = 'CRITICAL' - reason = '; '.join(issues[:2]) - elif issues: - status = 'WARNING' - reason = '; '.join(issues[:2]) - else: - status = 'OK' - reason = None + # If no issues, return OK + if not issues: + return {'status': 'OK'} - result = {'status': status} - if reason: - result['reason'] = reason - if interface_details: - result['interfaces'] = interface_details + has_critical = any(d.get('status') == 'CRITICAL' for d in interface_details.values()) - return result - - except Exception as e: return { - 'status': 'WARNING', - 'reason': f'Network check failed: {str(e)}' + 'status': 'CRITICAL' if has_critical else 'WARNING', + 'reason': '; '.join(issues[:2]), + 'details': interface_details } + + except Exception: + return None - def _check_network_latency(self) -> Dict[str, Any]: - """Check network latency to gateway/DNS (cached, max 1 check per minute)""" + def _check_network_latency(self) -> Optional[Dict[str, Any]]: + """Check network latency to 1.1.1.1 (cached)""" cache_key = 'network_latency' current_time = time.time() - # Check cache if cache_key in self.last_check_times: if current_time - self.last_check_times[cache_key] < 60: - return self.cached_results.get(cache_key, {'status': 'OK'}) + return self.cached_results.get(cache_key) try: - # Ping default gateway or 1.1.1.1 result = subprocess.run( ['ping', '-c', '1', '-W', '1', '1.1.1.1'], capture_output=True, @@ -804,7 +803,6 @@ class HealthMonitor: ) if result.returncode == 0: - # Extract latency for line in result.stdout.split('\n'): if 'time=' in line: try: @@ -834,7 +832,6 @@ class HealthMonitor: except: pass - # Ping failed packet_loss_result = { 'status': 'CRITICAL', 'reason': 'Packet loss or timeout' @@ -843,101 +840,58 @@ class HealthMonitor: self.last_check_times[cache_key] = current_time return packet_loss_result - except Exception as e: - error_result = { - 'status': 'WARNING', - 'reason': f'Latency check failed: {str(e)}' - } - self.cached_results[cache_key] = error_result - self.last_check_times[cache_key] = current_time - return error_result + except Exception: + return None - def _check_vms_cts(self) -> Dict[str, Any]: - """Check VM and CT status for unexpected stops""" + def _check_vms_cts_optimized(self) -> Optional[Dict[str, Any]]: + """ + Optimized VM/CT check - only reports failed starts. + Checks logs for VMs/CTs that failed to start. + """ try: issues = [] vm_details = {} - # Check VMs - try: - result = subprocess.run( - ['qm', 'list'], - capture_output=True, - text=True, - timeout=3 - ) - - if result.returncode == 0: - for line in result.stdout.strip().split('\n')[1:]: - if line.strip(): - parts = line.split() - if len(parts) >= 3: - vmid = parts[0] - vm_status = parts[2] - - if vm_status == 'stopped': - # Check if unexpected (this is simplified, would need autostart config) - vm_details[f'vm_{vmid}'] = { - 'status': 'WARNING', - 'reason': 'VM stopped' + # Check logs for failed VM/CT starts + result = subprocess.run( + ['journalctl', '--since', '10 minutes ago', '--no-pager', '-u', 'pve*'], + capture_output=True, + text=True, + timeout=3 + ) + + if result.returncode == 0: + for line in result.stdout.split('\n'): + line_lower = line.lower() + + # Detect VM/CT start failures + if 'failed to start' in line_lower or 'error starting' in line_lower or \ + 'start error' in line_lower or 'cannot start' in line_lower: + # Extract VM/CT ID + for word in line.split(): + if word.isdigit() and len(word) <= 4: + vmid = word + if vmid not in self.failed_vm_history: + self.failed_vm_history.add(vmid) + issues.append(f'VM/CT {vmid} failed to start') + vm_details[f'vmct_{vmid}'] = { + 'status': 'CRITICAL', + 'reason': 'Failed to start' } - issues.append(f'VM {vmid} stopped') - except Exception as e: - vm_details['vms_check'] = { - 'status': 'WARNING', - 'reason': f'Could not check VMs: {str(e)}' - } + break - # Check CTs - try: - result = subprocess.run( - ['pct', 'list'], - capture_output=True, - text=True, - timeout=3 - ) - - if result.returncode == 0: - for line in result.stdout.strip().split('\n')[1:]: - if line.strip(): - parts = line.split() - if len(parts) >= 2: - ctid = parts[0] - ct_status = parts[1] - - if ct_status == 'stopped': - vm_details[f'ct_{ctid}'] = { - 'status': 'WARNING', - 'reason': 'CT stopped' - } - issues.append(f'CT {ctid} stopped') - except Exception as e: - vm_details['cts_check'] = { - 'status': 'WARNING', - 'reason': f'Could not check CTs: {str(e)}' - } + # If no issues, return OK + if not issues: + return {'status': 'OK'} - # Determine overall status - if issues: - status = 'WARNING' - reason = '; '.join(issues[:3]) - else: - status = 'OK' - reason = None - - result = {'status': status} - if reason: - result['reason'] = reason - if vm_details: - result['details'] = vm_details - - return result - - except Exception as e: return { - 'status': 'WARNING', - 'reason': f'VM/CT check failed: {str(e)}' + 'status': 'CRITICAL', + 'reason': '; '.join(issues[:3]), + 'details': vm_details } + + except Exception: + return None def _check_pve_services(self) -> Dict[str, Any]: """Check critical Proxmox services""" @@ -978,13 +932,11 @@ class HealthMonitor: cache_key = 'logs_analysis' current_time = time.time() - # Check cache if cache_key in self.last_check_times: if current_time - self.last_check_times[cache_key] < self.LOG_CHECK_INTERVAL: return self.cached_results.get(cache_key, {'status': 'OK'}) try: - # Check journalctl for recent errors and warnings result = subprocess.run( ['journalctl', '--since', '5 minutes ago', '--no-pager', '-p', 'warning'], capture_output=True, @@ -1002,29 +954,26 @@ class HealthMonitor: for line in lines: line_lower = line.lower() - # Check for critical keywords for keyword in self.CRITICAL_LOG_KEYWORDS: if keyword.lower() in line_lower: critical_keywords_found.append(keyword) errors_5m += 1 break else: - # Count errors and warnings if 'error' in line_lower or 'critical' in line_lower or 'fatal' in line_lower: errors_5m += 1 elif 'warning' in line_lower or 'warn' in line_lower: warnings_5m += 1 - # Determine status if critical_keywords_found: status = 'CRITICAL' reason = f'Critical errors: {", ".join(set(critical_keywords_found[:3]))}' elif errors_5m >= self.LOG_ERRORS_CRITICAL: status = 'CRITICAL' - reason = f'{errors_5m} errors in 5 minutes (≥{self.LOG_ERRORS_CRITICAL})' + reason = f'{errors_5m} errors in 5 minutes' elif warnings_5m >= self.LOG_WARNINGS_CRITICAL: - status = 'CRITICAL' - reason = f'{warnings_5m} warnings in 5 minutes (≥{self.LOG_WARNINGS_CRITICAL})' + status = 'WARNING' + reason = f'{warnings_5m} warnings in 5 minutes' elif errors_5m >= self.LOG_ERRORS_WARNING: status = 'WARNING' reason = f'{errors_5m} errors in 5 minutes' @@ -1035,11 +984,7 @@ class HealthMonitor: status = 'OK' reason = None - log_result = { - 'status': status, - 'errors_5m': errors_5m, - 'warnings_5m': warnings_5m - } + log_result = {'status': status} if reason: log_result['reason'] = reason @@ -1052,34 +997,63 @@ class HealthMonitor: self.last_check_times[cache_key] = current_time return ok_result - except Exception as e: - error_result = { - 'status': 'WARNING', - 'reason': f'Log check failed: {str(e)}' - } - self.cached_results[cache_key] = error_result - self.last_check_times[cache_key] = current_time - return error_result + except Exception: + return {'status': 'OK'} + + def _check_updates(self) -> Optional[Dict[str, Any]]: + """Check for pending system updates (cached, checked every 10 minutes)""" + cache_key = 'updates_check' + current_time = time.time() + + if cache_key in self.last_check_times: + if current_time - self.last_check_times[cache_key] < 600: + return self.cached_results.get(cache_key) + + try: + # Check apt updates + result = subprocess.run( + ['apt', 'list', '--upgradable'], + capture_output=True, + text=True, + timeout=5 + ) + + if result.returncode == 0: + lines = result.stdout.strip().split('\n') + # First line is header + update_count = len([l for l in lines if l and not l.startswith('Listing')]) + + if update_count >= self.UPDATES_CRITICAL: + status = 'WARNING' + reason = f'{update_count} updates pending (≥{self.UPDATES_CRITICAL})' + elif update_count >= self.UPDATES_WARNING: + status = 'WARNING' + reason = f'{update_count} updates pending' + else: + status = 'OK' + reason = None + + update_result = { + 'status': status, + 'count': update_count + } + if reason: + update_result['reason'] = reason + + self.cached_results[cache_key] = update_result + self.last_check_times[cache_key] = current_time + return update_result + + return None + + except Exception: + return None def _check_security(self) -> Dict[str, Any]: - """Check security-related items (fail2ban, certificates, uptime)""" + """Check security-related items (certificates, uptime)""" try: issues = [] - # Check fail2ban - try: - result = subprocess.run( - ['systemctl', 'is-active', 'fail2ban'], - capture_output=True, - text=True, - timeout=2 - ) - - if result.returncode != 0 or result.stdout.strip() != 'active': - issues.append('fail2ban inactive') - except Exception: - pass - # Check uptime (warning if >180 days) try: uptime_seconds = time.time() - psutil.boot_time() @@ -1090,9 +1064,9 @@ class HealthMonitor: except Exception: pass - # Check SSL certificates (cached, checked once per day) + # Check SSL certificates cert_status = self._check_certificates() - if cert_status.get('status') != 'OK': + if cert_status and cert_status.get('status') != 'OK': issues.append(cert_status.get('reason', 'Certificate issue')) if issues: @@ -1103,24 +1077,19 @@ class HealthMonitor: return {'status': 'OK'} - except Exception as e: - return { - 'status': 'WARNING', - 'reason': f'Security check failed: {str(e)}' - } + except Exception: + return {'status': 'OK'} - def _check_certificates(self) -> Dict[str, Any]: + def _check_certificates(self) -> Optional[Dict[str, Any]]: """Check SSL certificate expiration (cached, checked once per day)""" cache_key = 'certificates' current_time = time.time() - # Check cache (24 hours) if cache_key in self.last_check_times: if current_time - self.last_check_times[cache_key] < 86400: - return self.cached_results.get(cache_key, {'status': 'OK'}) + return self.cached_results.get(cache_key) try: - # Check PVE certificate cert_path = '/etc/pve/local/pve-ssl.pem' if os.path.exists(cert_path): @@ -1132,7 +1101,6 @@ class HealthMonitor: ) if result.returncode == 0: - # Parse expiration date date_str = result.stdout.strip().replace('notAfter=', '') try: @@ -1160,16 +1128,10 @@ class HealthMonitor: except Exception: pass - ok_result = {'status': 'OK'} - self.cached_results[cache_key] = ok_result - self.last_check_times[cache_key] = current_time - return ok_result + return None except Exception: - ok_result = {'status': 'OK'} - self.cached_results[cache_key] = ok_result - self.last_check_times[cache_key] = current_time - return ok_result + return None # Global instance diff --git a/scripts/test/Heriberto.AppImage b/scripts/test/Heriberto.AppImage deleted file mode 100755 index 718ab1b..0000000 Binary files a/scripts/test/Heriberto.AppImage and /dev/null differ diff --git a/scripts/test/ProxMenux-beta3.AppImage b/scripts/test/ProxMenux-beta3.AppImage deleted file mode 100755 index 9c1f1ae..0000000 Binary files a/scripts/test/ProxMenux-beta3.AppImage and /dev/null differ diff --git a/scripts/test/ProxMenux-beta4.AppImage b/scripts/test/ProxMenux-beta4.AppImage deleted file mode 100755 index 0f0daa5..0000000 Binary files a/scripts/test/ProxMenux-beta4.AppImage and /dev/null differ diff --git a/scripts/test/ProxMenux-beta5.AppImage b/scripts/test/ProxMenux-beta5.AppImage deleted file mode 100644 index 45979dd..0000000 Binary files a/scripts/test/ProxMenux-beta5.AppImage and /dev/null differ diff --git a/scripts/test/ProxMenux-beta6.AppImage b/scripts/test/ProxMenux-beta6.AppImage deleted file mode 100755 index 1efdb3e..0000000 Binary files a/scripts/test/ProxMenux-beta6.AppImage and /dev/null differ diff --git a/scripts/test/ProxMenux-beta7.AppImage b/scripts/test/ProxMenux-beta7.AppImage deleted file mode 100755 index 3b5c045..0000000 Binary files a/scripts/test/ProxMenux-beta7.AppImage and /dev/null differ diff --git a/scripts/test/ProxMenux-rc.AppImage b/scripts/test/ProxMenux-rc.AppImage deleted file mode 100755 index 19bdb74..0000000 Binary files a/scripts/test/ProxMenux-rc.AppImage and /dev/null differ diff --git a/scripts/test/ProxMenux-rc2.AppImage b/scripts/test/ProxMenux-rc2.AppImage deleted file mode 100755 index 718ab1b..0000000 Binary files a/scripts/test/ProxMenux-rc2.AppImage and /dev/null differ diff --git a/scripts/test/rafa.AppImage b/scripts/test/rafa.AppImage deleted file mode 100755 index 8b6a021..0000000 Binary files a/scripts/test/rafa.AppImage and /dev/null differ diff --git a/scripts/vm/select_nas_iso.sh b/scripts/vm/select_nas_iso.sh index 160c4ff..1009c35 100644 --- a/scripts/vm/select_nas_iso.sh +++ b/scripts/vm/select_nas_iso.sh @@ -89,9 +89,9 @@ function select_nas_iso() { HN="OpenMediaVault" ;; 5) - ISO_NAME="XigmaNAS-13.3.0.5" - ISO_URL="https://sourceforge.net/projects/xigmanas/files/XigmaNAS-13.3.0.5/13.3.0.5.10153/XigmaNAS-x64-LiveCD-13.3.0.5.10153.iso/download" - ISO_FILE="XigmaNAS-x64-LiveCD-13.3.0.5.10153.iso" + ISO_NAME="XigmaNAS-14.3.0.5" + ISO_URL="https://sourceforge.net/projects/xigmanas/files/XigmaNAS-14.3.0.5/14.3.0.5.10566/XigmaNAS-x64-LiveCD-14.3.0.5.10566.iso/download" + ISO_FILE="XigmaNAS-x64-LiveCD-14.3.0.5.10566.iso" ISO_PATH="$ISO_DIR/$ISO_FILE" HN="XigmaNAS" ;;