mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2025-11-18 03:26:17 +00:00
Update AppImage
This commit is contained in:
@@ -1,8 +1,11 @@
|
|||||||
"use client"
|
"use client"
|
||||||
|
|
||||||
|
import type React from "react"
|
||||||
|
|
||||||
import { useState, useEffect } from "react"
|
import { useState, useEffect } from "react"
|
||||||
import { Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle } from "@/components/ui/dialog"
|
import { Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle } from "@/components/ui/dialog"
|
||||||
import { Badge } from "@/components/ui/badge"
|
import { Badge } from "@/components/ui/badge"
|
||||||
|
import { Button } from "@/components/ui/button"
|
||||||
import {
|
import {
|
||||||
Loader2,
|
Loader2,
|
||||||
CheckCircle2,
|
CheckCircle2,
|
||||||
@@ -19,6 +22,7 @@ import {
|
|||||||
FileText,
|
FileText,
|
||||||
RefreshCw,
|
RefreshCw,
|
||||||
Shield,
|
Shield,
|
||||||
|
X,
|
||||||
} from "lucide-react"
|
} from "lucide-react"
|
||||||
|
|
||||||
interface CategoryCheck {
|
interface CategoryCheck {
|
||||||
@@ -148,17 +152,53 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
|
|||||||
|
|
||||||
const stats = getHealthStats()
|
const stats = getHealthStats()
|
||||||
|
|
||||||
|
const handleCategoryClick = (categoryKey: string, status: string) => {
|
||||||
|
if (status === "OK") return // No navegar si está OK
|
||||||
|
|
||||||
|
onOpenChange(false) // Cerrar el modal
|
||||||
|
|
||||||
|
// Mapear categorías a tabs
|
||||||
|
const categoryToTab: Record<string, string> = {
|
||||||
|
storage: "storage",
|
||||||
|
disks: "storage",
|
||||||
|
network: "network",
|
||||||
|
vms: "vms",
|
||||||
|
logs: "logs",
|
||||||
|
hardware: "hardware",
|
||||||
|
services: "hardware",
|
||||||
|
}
|
||||||
|
|
||||||
|
const targetTab = categoryToTab[categoryKey]
|
||||||
|
if (targetTab) {
|
||||||
|
// Disparar evento para cambiar tab
|
||||||
|
const event = new CustomEvent("changeTab", { detail: { tab: targetTab } })
|
||||||
|
window.dispatchEvent(event)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleAcknowledge = async (errorKey: string, e: React.MouseEvent) => {
|
||||||
|
e.stopPropagation() // Prevent navigation
|
||||||
|
|
||||||
|
try {
|
||||||
|
await fetch(getApiUrl(`/api/health/acknowledge/${errorKey}`), {
|
||||||
|
method: "POST",
|
||||||
|
})
|
||||||
|
// Refresh health data
|
||||||
|
await fetchHealthDetails()
|
||||||
|
} catch (err) {
|
||||||
|
console.error("[v0] Error acknowledging:", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Dialog open={open} onOpenChange={onOpenChange}>
|
<Dialog open={open} onOpenChange={onOpenChange}>
|
||||||
<DialogContent className="max-w-3xl max-h-[85vh] overflow-y-auto">
|
<DialogContent className="max-w-3xl max-h-[85vh] overflow-y-auto">
|
||||||
<DialogHeader>
|
<DialogHeader>
|
||||||
<DialogTitle className="flex items-center justify-between">
|
<DialogTitle className="flex items-center gap-2">
|
||||||
<div className="flex items-center gap-2">
|
<Activity className="h-6 w-6" />
|
||||||
<Activity className="h-6 w-6" />
|
System Health Status
|
||||||
System Health Status
|
|
||||||
</div>
|
|
||||||
{healthData && getStatusBadge(healthData.overall)}
|
|
||||||
</DialogTitle>
|
</DialogTitle>
|
||||||
|
<div className="mt-4">{healthData && getStatusBadge(healthData.overall)}</div>
|
||||||
<DialogDescription>Detailed health checks for all system components</DialogDescription>
|
<DialogDescription>Detailed health checks for all system components</DialogDescription>
|
||||||
</DialogHeader>
|
</DialogHeader>
|
||||||
|
|
||||||
@@ -213,13 +253,14 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
|
|||||||
return (
|
return (
|
||||||
<div
|
<div
|
||||||
key={key}
|
key={key}
|
||||||
|
onClick={() => handleCategoryClick(key, status)}
|
||||||
className={`flex items-start gap-3 p-3 rounded-lg border transition-colors ${
|
className={`flex items-start gap-3 p-3 rounded-lg border transition-colors ${
|
||||||
status === "OK"
|
status === "OK"
|
||||||
? "bg-green-500/5 border-green-500/20 hover:bg-green-500/10"
|
? "bg-green-500/5 border-green-500/20 hover:bg-green-500/10"
|
||||||
: status === "WARNING"
|
: status === "WARNING"
|
||||||
? "bg-yellow-500/5 border-yellow-500/20 hover:bg-yellow-500/10"
|
? "bg-yellow-500/5 border-yellow-500/20 hover:bg-yellow-500/10 cursor-pointer"
|
||||||
: status === "CRITICAL"
|
: status === "CRITICAL"
|
||||||
? "bg-red-500/5 border-red-500/20 hover:bg-red-500/10"
|
? "bg-red-500/5 border-red-500/20 hover:bg-red-500/10 cursor-pointer"
|
||||||
: "bg-muted/30 hover:bg-muted/50"
|
: "bg-muted/30 hover:bg-muted/50"
|
||||||
}`}
|
}`}
|
||||||
>
|
>
|
||||||
@@ -251,10 +292,25 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
|
|||||||
{Object.entries(details).map(([detailKey, detailValue]: [string, any]) => {
|
{Object.entries(details).map(([detailKey, detailValue]: [string, any]) => {
|
||||||
if (typeof detailValue === "object" && detailValue !== null) {
|
if (typeof detailValue === "object" && detailValue !== null) {
|
||||||
return (
|
return (
|
||||||
<div key={detailKey} className="text-xs pl-3 border-l-2 border-muted">
|
<div
|
||||||
<span className="font-medium">{detailKey}:</span>
|
key={detailKey}
|
||||||
{detailValue.reason && (
|
className="flex items-start justify-between gap-2 text-xs pl-3 border-l-2 border-muted"
|
||||||
<span className="ml-1 text-muted-foreground">{detailValue.reason}</span>
|
>
|
||||||
|
<div>
|
||||||
|
<span className="font-medium">{detailKey}:</span>
|
||||||
|
{detailValue.reason && (
|
||||||
|
<span className="ml-1 text-muted-foreground">{detailValue.reason}</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
{status !== "OK" && (
|
||||||
|
<Button
|
||||||
|
size="sm"
|
||||||
|
variant="ghost"
|
||||||
|
className="h-5 px-1 hover:bg-red-500/10"
|
||||||
|
onClick={(e) => handleAcknowledge(detailKey, e)}
|
||||||
|
>
|
||||||
|
<X className="h-3 w-3" />
|
||||||
|
</Button>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -98,10 +98,19 @@ export function ProxmoxDashboard() {
|
|||||||
const uptimeValue =
|
const uptimeValue =
|
||||||
data.uptime && typeof data.uptime === "string" && data.uptime.trim() !== "" ? data.uptime : "N/A"
|
data.uptime && typeof data.uptime === "string" && data.uptime.trim() !== "" ? data.uptime : "N/A"
|
||||||
|
|
||||||
const healthStatus = data.health?.status || "healthy"
|
const backendStatus = data.health?.status?.toUpperCase() || "OK"
|
||||||
|
let healthStatus: "healthy" | "warning" | "critical"
|
||||||
|
|
||||||
|
if (backendStatus === "CRITICAL") {
|
||||||
|
healthStatus = "critical"
|
||||||
|
} else if (backendStatus === "WARNING") {
|
||||||
|
healthStatus = "warning"
|
||||||
|
} else {
|
||||||
|
healthStatus = "healthy"
|
||||||
|
}
|
||||||
|
|
||||||
setSystemStatus({
|
setSystemStatus({
|
||||||
status: healthStatus as "healthy" | "warning" | "critical",
|
status: healthStatus,
|
||||||
uptime: uptimeValue,
|
uptime: uptimeValue,
|
||||||
lastUpdate: new Date().toLocaleTimeString("en-US", { hour12: false }),
|
lastUpdate: new Date().toLocaleTimeString("en-US", { hour12: false }),
|
||||||
serverName: data.hostname || "Unknown",
|
serverName: data.hostname || "Unknown",
|
||||||
@@ -127,11 +136,13 @@ export function ProxmoxDashboard() {
|
|||||||
// Siempre fetch inicial
|
// Siempre fetch inicial
|
||||||
fetchSystemData()
|
fetchSystemData()
|
||||||
|
|
||||||
|
// En overview: cada 30 segundos para actualización frecuente del estado de salud
|
||||||
|
// En otras tabs: cada 60 segundos para reducir carga
|
||||||
let interval: ReturnType<typeof setInterval> | null = null
|
let interval: ReturnType<typeof setInterval> | null = null
|
||||||
if (activeTab === "overview") {
|
if (activeTab === "overview") {
|
||||||
interval = setInterval(fetchSystemData, 9000) // Cambiado de 10000 a 9000ms
|
interval = setInterval(fetchSystemData, 30000) // 30 segundos
|
||||||
} else {
|
} else {
|
||||||
interval = setInterval(fetchSystemData, 61000) // Cambiado de 60000 a 61000ms
|
interval = setInterval(fetchSystemData, 60000) // 60 segundos
|
||||||
}
|
}
|
||||||
|
|
||||||
return () => {
|
return () => {
|
||||||
@@ -139,6 +150,20 @@ export function ProxmoxDashboard() {
|
|||||||
}
|
}
|
||||||
}, [fetchSystemData, activeTab])
|
}, [fetchSystemData, activeTab])
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const handleChangeTab = (event: CustomEvent) => {
|
||||||
|
const { tab } = event.detail
|
||||||
|
if (tab) {
|
||||||
|
setActiveTab(tab)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
window.addEventListener("changeTab", handleChangeTab as EventListener)
|
||||||
|
return () => {
|
||||||
|
window.removeEventListener("changeTab", handleChangeTab as EventListener)
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (
|
if (
|
||||||
systemStatus.serverName &&
|
systemStatus.serverName &&
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
"""
|
"""
|
||||||
Flask routes for health monitoring
|
Flask routes for health monitoring with persistence support
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from flask import Blueprint, jsonify
|
from flask import Blueprint, jsonify, request
|
||||||
from health_monitor import health_monitor
|
from health_monitor import health_monitor
|
||||||
|
from health_persistence import health_persistence
|
||||||
|
|
||||||
health_bp = Blueprint('health', __name__)
|
health_bp = Blueprint('health', __name__)
|
||||||
|
|
||||||
@@ -47,3 +48,22 @@ def get_system_info():
|
|||||||
return jsonify(info)
|
return jsonify(info)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return jsonify({'error': str(e)}), 500
|
return jsonify({'error': str(e)}), 500
|
||||||
|
|
||||||
|
@health_bp.route('/api/health/acknowledge/<error_key>', methods=['POST'])
|
||||||
|
def acknowledge_error(error_key):
|
||||||
|
"""Acknowledge an error manually (user dismissed it)"""
|
||||||
|
try:
|
||||||
|
health_persistence.acknowledge_error(error_key)
|
||||||
|
return jsonify({'success': True, 'message': 'Error acknowledged'})
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({'error': str(e)}), 500
|
||||||
|
|
||||||
|
@health_bp.route('/api/health/active-errors', methods=['GET'])
|
||||||
|
def get_active_errors():
|
||||||
|
"""Get all active persistent errors"""
|
||||||
|
try:
|
||||||
|
category = request.args.get('category')
|
||||||
|
errors = health_persistence.get_active_errors(category)
|
||||||
|
return jsonify({'errors': errors})
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({'error': str(e)}), 500
|
||||||
|
|||||||
@@ -17,10 +17,12 @@ from datetime import datetime, timedelta
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from health_persistence import health_persistence
|
||||||
|
|
||||||
class HealthMonitor:
|
class HealthMonitor:
|
||||||
"""
|
"""
|
||||||
Monitors system health across multiple components with minimal impact.
|
Monitors system health across multiple components with minimal impact.
|
||||||
Implements hysteresis, intelligent caching, and progressive escalation.
|
Implements hysteresis, intelligent caching, progressive escalation, and persistent error tracking.
|
||||||
Always returns all 10 health categories.
|
Always returns all 10 health categories.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -28,8 +30,8 @@ class HealthMonitor:
|
|||||||
CPU_WARNING = 85
|
CPU_WARNING = 85
|
||||||
CPU_CRITICAL = 95
|
CPU_CRITICAL = 95
|
||||||
CPU_RECOVERY = 75
|
CPU_RECOVERY = 75
|
||||||
CPU_WARNING_DURATION = 60
|
CPU_WARNING_DURATION = 300 # 5 minutes sustained
|
||||||
CPU_CRITICAL_DURATION = 120
|
CPU_CRITICAL_DURATION = 300 # 5 minutes sustained
|
||||||
CPU_RECOVERY_DURATION = 120
|
CPU_RECOVERY_DURATION = 120
|
||||||
|
|
||||||
# Memory Thresholds
|
# Memory Thresholds
|
||||||
@@ -85,6 +87,11 @@ class HealthMonitor:
|
|||||||
self.io_error_history = defaultdict(list)
|
self.io_error_history = defaultdict(list)
|
||||||
self.failed_vm_history = set() # Track VMs that failed to start
|
self.failed_vm_history = set() # Track VMs that failed to start
|
||||||
|
|
||||||
|
try:
|
||||||
|
health_persistence.cleanup_old_errors()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[HealthMonitor] Cleanup warning: {e}")
|
||||||
|
|
||||||
def get_system_info(self) -> Dict[str, Any]:
|
def get_system_info(self) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Get lightweight system info for header display.
|
Get lightweight system info for header display.
|
||||||
@@ -188,7 +195,11 @@ class HealthMonitor:
|
|||||||
"""
|
"""
|
||||||
Get comprehensive health status with all checks.
|
Get comprehensive health status with all checks.
|
||||||
Returns JSON structure with ALL 10 categories always present.
|
Returns JSON structure with ALL 10 categories always present.
|
||||||
|
Now includes persistent error tracking.
|
||||||
"""
|
"""
|
||||||
|
active_errors = health_persistence.get_active_errors()
|
||||||
|
persistent_issues = {err['error_key']: err for err in active_errors}
|
||||||
|
|
||||||
details = {
|
details = {
|
||||||
'cpu': {'status': 'OK'},
|
'cpu': {'status': 'OK'},
|
||||||
'memory': {'status': 'OK'},
|
'memory': {'status': 'OK'},
|
||||||
@@ -231,8 +242,8 @@ class HealthMonitor:
|
|||||||
elif disks_status.get('status') == 'WARNING':
|
elif disks_status.get('status') == 'WARNING':
|
||||||
warning_issues.append(disks_status.get('reason', 'Disk issue'))
|
warning_issues.append(disks_status.get('reason', 'Disk issue'))
|
||||||
|
|
||||||
# Priority 4: VMs/CTs - now detects qmp errors from logs
|
# Priority 4: VMs/CTs - now with persistence
|
||||||
vms_status = self._check_vms_cts_optimized()
|
vms_status = self._check_vms_cts_with_persistence()
|
||||||
if vms_status:
|
if vms_status:
|
||||||
details['vms'] = vms_status
|
details['vms'] = vms_status
|
||||||
if vms_status.get('status') == 'CRITICAL':
|
if vms_status.get('status') == 'CRITICAL':
|
||||||
@@ -265,8 +276,8 @@ class HealthMonitor:
|
|||||||
elif memory_status.get('status') == 'WARNING':
|
elif memory_status.get('status') == 'WARNING':
|
||||||
warning_issues.append(memory_status.get('reason', 'Memory high'))
|
warning_issues.append(memory_status.get('reason', 'Memory high'))
|
||||||
|
|
||||||
# Priority 8: Logs
|
# Priority 8: Logs - now with persistence
|
||||||
logs_status = self._check_logs_lightweight()
|
logs_status = self._check_logs_with_persistence()
|
||||||
details['logs'] = logs_status
|
details['logs'] = logs_status
|
||||||
if logs_status.get('status') == 'CRITICAL':
|
if logs_status.get('status') == 'CRITICAL':
|
||||||
critical_issues.append(logs_status.get('reason', 'Critical log errors'))
|
critical_issues.append(logs_status.get('reason', 'Critical log errors'))
|
||||||
@@ -305,7 +316,7 @@ class HealthMonitor:
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _check_cpu_with_hysteresis(self) -> Dict[str, Any]:
|
def _check_cpu_with_hysteresis(self) -> Dict[str, Any]:
|
||||||
"""Check CPU with hysteresis to avoid flapping alerts"""
|
"""Check CPU with hysteresis to avoid flapping alerts - requires 5min sustained high usage"""
|
||||||
try:
|
try:
|
||||||
cpu_percent = psutil.cpu_percent(interval=1)
|
cpu_percent = psutil.cpu_percent(interval=1)
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
@@ -318,33 +329,33 @@ class HealthMonitor:
|
|||||||
|
|
||||||
self.state_history[state_key] = [
|
self.state_history[state_key] = [
|
||||||
entry for entry in self.state_history[state_key]
|
entry for entry in self.state_history[state_key]
|
||||||
if current_time - entry['time'] < 300
|
if current_time - entry['time'] < 360
|
||||||
]
|
]
|
||||||
|
|
||||||
critical_duration = sum(
|
critical_samples = [
|
||||||
1 for entry in self.state_history[state_key]
|
entry for entry in self.state_history[state_key]
|
||||||
if entry['value'] >= self.CPU_CRITICAL and
|
if entry['value'] >= self.CPU_CRITICAL and
|
||||||
current_time - entry['time'] <= self.CPU_CRITICAL_DURATION
|
current_time - entry['time'] <= self.CPU_CRITICAL_DURATION
|
||||||
)
|
]
|
||||||
|
|
||||||
warning_duration = sum(
|
warning_samples = [
|
||||||
1 for entry in self.state_history[state_key]
|
entry for entry in self.state_history[state_key]
|
||||||
if entry['value'] >= self.CPU_WARNING and
|
if entry['value'] >= self.CPU_WARNING and
|
||||||
current_time - entry['time'] <= self.CPU_WARNING_DURATION
|
current_time - entry['time'] <= self.CPU_WARNING_DURATION
|
||||||
)
|
]
|
||||||
|
|
||||||
recovery_duration = sum(
|
recovery_samples = [
|
||||||
1 for entry in self.state_history[state_key]
|
entry for entry in self.state_history[state_key]
|
||||||
if entry['value'] < self.CPU_RECOVERY and
|
if entry['value'] < self.CPU_RECOVERY and
|
||||||
current_time - entry['time'] <= self.CPU_RECOVERY_DURATION
|
current_time - entry['time'] <= self.CPU_RECOVERY_DURATION
|
||||||
)
|
]
|
||||||
|
|
||||||
if critical_duration >= 2:
|
if len(critical_samples) >= 3:
|
||||||
status = 'CRITICAL'
|
status = 'CRITICAL'
|
||||||
reason = f'CPU >{self.CPU_CRITICAL}% for {self.CPU_CRITICAL_DURATION}s'
|
reason = f'CPU >{self.CPU_CRITICAL}% sustained for {self.CPU_CRITICAL_DURATION}s'
|
||||||
elif warning_duration >= 2 and recovery_duration < 2:
|
elif len(warning_samples) >= 3 and len(recovery_samples) < 2:
|
||||||
status = 'WARNING'
|
status = 'WARNING'
|
||||||
reason = f'CPU >{self.CPU_WARNING}% for {self.CPU_WARNING_DURATION}s'
|
reason = f'CPU >{self.CPU_WARNING}% sustained for {self.CPU_WARNING_DURATION}s'
|
||||||
else:
|
else:
|
||||||
status = 'OK'
|
status = 'OK'
|
||||||
reason = None
|
reason = None
|
||||||
@@ -871,15 +882,15 @@ class HealthMonitor:
|
|||||||
|
|
||||||
def _check_vms_cts_optimized(self) -> Dict[str, Any]:
|
def _check_vms_cts_optimized(self) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Optimized VM/CT check - detects qmp failures and other VM errors.
|
Optimized VM/CT check - detects qmp failures and startup errors from logs.
|
||||||
Now parses logs for VM/CT specific errors like qmp command failures.
|
Improved detection of container and VM errors from journalctl.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
issues = []
|
issues = []
|
||||||
vm_details = {}
|
vm_details = {}
|
||||||
|
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
['journalctl', '--since', '10 minutes ago', '--no-pager', '-u', 'pve*', '-p', 'warning'],
|
['journalctl', '--since', '10 minutes ago', '--no-pager', '-p', 'warning'],
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
text=True,
|
text=True,
|
||||||
timeout=3
|
timeout=3
|
||||||
@@ -903,22 +914,56 @@ class HealthMonitor:
|
|||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ct_match = re.search(r'(?:ct|container)\s+(\d+)', line_lower)
|
ct_error_match = re.search(r'(?:ct|container|lxc)\s+(\d+)', line_lower)
|
||||||
if ct_match and ('error' in line_lower or 'fail' in line_lower):
|
if ct_error_match and ('error' in line_lower or 'fail' in line_lower or 'device' in line_lower):
|
||||||
ctid = ct_match.group(1)
|
ctid = ct_error_match.group(1)
|
||||||
key = f'ct_{ctid}'
|
key = f'ct_{ctid}'
|
||||||
if key not in vm_details:
|
if key not in vm_details:
|
||||||
issues.append(f'CT {ctid}: Error detected')
|
if 'device' in line_lower and 'does not exist' in line_lower:
|
||||||
|
device_match = re.search(r'device\s+([/\w\d]+)\s+does not exist', line_lower)
|
||||||
|
if device_match:
|
||||||
|
reason = f'Device {device_match.group(1)} missing'
|
||||||
|
else:
|
||||||
|
reason = 'Device error'
|
||||||
|
elif 'failed to start' in line_lower:
|
||||||
|
reason = 'Failed to start'
|
||||||
|
else:
|
||||||
|
reason = 'Container error'
|
||||||
|
|
||||||
|
issues.append(f'CT {ctid}: {reason}')
|
||||||
|
vm_details[key] = {
|
||||||
|
'status': 'WARNING' if 'device' in reason.lower() else 'CRITICAL',
|
||||||
|
'reason': reason,
|
||||||
|
'id': ctid,
|
||||||
|
'type': 'CT'
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
|
||||||
|
vzstart_match = re.search(r'vzstart:(\d+):', line)
|
||||||
|
if vzstart_match and ('error' in line_lower or 'fail' in line_lower or 'does not exist' in line_lower):
|
||||||
|
ctid = vzstart_match.group(1)
|
||||||
|
key = f'ct_{ctid}'
|
||||||
|
if key not in vm_details:
|
||||||
|
# Extraer mensaje de error
|
||||||
|
if 'device' in line_lower and 'does not exist' in line_lower:
|
||||||
|
device_match = re.search(r'device\s+([/\w\d]+)\s+does not exist', line_lower)
|
||||||
|
if device_match:
|
||||||
|
reason = f'Device {device_match.group(1)} missing'
|
||||||
|
else:
|
||||||
|
reason = 'Device error'
|
||||||
|
else:
|
||||||
|
reason = 'Startup error'
|
||||||
|
|
||||||
|
issues.append(f'CT {ctid}: {reason}')
|
||||||
vm_details[key] = {
|
vm_details[key] = {
|
||||||
'status': 'WARNING',
|
'status': 'WARNING',
|
||||||
'reason': 'Container error',
|
'reason': reason,
|
||||||
'id': ctid,
|
'id': ctid,
|
||||||
'type': 'CT'
|
'type': 'CT'
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if any(keyword in line_lower for keyword in ['failed to start', 'cannot start', 'activation failed', 'start error']):
|
if any(keyword in line_lower for keyword in ['failed to start', 'cannot start', 'activation failed', 'start error']):
|
||||||
# Extract VM/CT ID
|
|
||||||
id_match = re.search(r'\b(\d{3,4})\b', line)
|
id_match = re.search(r'\b(\d{3,4})\b', line)
|
||||||
if id_match:
|
if id_match:
|
||||||
vmid = id_match.group(1)
|
vmid = id_match.group(1)
|
||||||
@@ -946,6 +991,118 @@ class HealthMonitor:
|
|||||||
except Exception:
|
except Exception:
|
||||||
return {'status': 'OK'}
|
return {'status': 'OK'}
|
||||||
|
|
||||||
|
# Modified to use persistence
|
||||||
|
def _check_vms_cts_with_persistence(self) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Check VMs/CTs with persistent error tracking.
|
||||||
|
Errors persist until VM starts or 48h elapsed.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
issues = []
|
||||||
|
vm_details = {}
|
||||||
|
|
||||||
|
# Get persistent errors first
|
||||||
|
persistent_errors = health_persistence.get_active_errors('vms')
|
||||||
|
|
||||||
|
# Check if any persistent VMs/CTs have started
|
||||||
|
for error in persistent_errors:
|
||||||
|
error_key = error['error_key']
|
||||||
|
if error_key.startswith('vm_') or error_key.startswith('ct_'):
|
||||||
|
vm_id = error_key.split('_')[1]
|
||||||
|
if health_persistence.check_vm_running(vm_id):
|
||||||
|
continue # Error auto-resolved
|
||||||
|
|
||||||
|
# Still active
|
||||||
|
vm_details[error_key] = {
|
||||||
|
'status': error['severity'],
|
||||||
|
'reason': error['reason'],
|
||||||
|
'id': error.get('details', {}).get('id', 'unknown'),
|
||||||
|
'type': error.get('details', {}).get('type', 'VM/CT'),
|
||||||
|
'first_seen': error['first_seen']
|
||||||
|
}
|
||||||
|
issues.append(f"{error.get('details', {}).get('type', 'VM')} {error.get('details', {}).get('id', '')}: {error['reason']}")
|
||||||
|
|
||||||
|
# Check for new errors in logs
|
||||||
|
result = subprocess.run(
|
||||||
|
['journalctl', '--since', '10 minutes ago', '--no-pager', '-p', 'warning'],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=3
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.returncode == 0:
|
||||||
|
for line in result.stdout.split('\n'):
|
||||||
|
line_lower = line.lower()
|
||||||
|
|
||||||
|
# VM QMP errors
|
||||||
|
vm_qmp_match = re.search(r'vm\s+(\d+)\s+qmp\s+command.*(?:failed|unable|timeout)', line_lower)
|
||||||
|
if vm_qmp_match:
|
||||||
|
vmid = vm_qmp_match.group(1)
|
||||||
|
error_key = f'vm_{vmid}'
|
||||||
|
if error_key not in vm_details:
|
||||||
|
# Record persistent error
|
||||||
|
health_persistence.record_error(
|
||||||
|
error_key=error_key,
|
||||||
|
category='vms',
|
||||||
|
severity='WARNING',
|
||||||
|
reason='QMP command timeout',
|
||||||
|
details={'id': vmid, 'type': 'VM'}
|
||||||
|
)
|
||||||
|
issues.append(f'VM {vmid}: Communication issue')
|
||||||
|
vm_details[error_key] = {
|
||||||
|
'status': 'WARNING',
|
||||||
|
'reason': 'QMP command timeout',
|
||||||
|
'id': vmid,
|
||||||
|
'type': 'VM'
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Container errors
|
||||||
|
vzstart_match = re.search(r'vzstart:(\d+):', line)
|
||||||
|
if vzstart_match and ('error' in line_lower or 'fail' in line_lower or 'does not exist' in line_lower):
|
||||||
|
ctid = vzstart_match.group(1)
|
||||||
|
error_key = f'ct_{ctid}'
|
||||||
|
|
||||||
|
if error_key not in vm_details:
|
||||||
|
if 'device' in line_lower and 'does not exist' in line_lower:
|
||||||
|
device_match = re.search(r'device\s+([/\w\d]+)\s+does not exist', line_lower)
|
||||||
|
if device_match:
|
||||||
|
reason = f'Device {device_match.group(1)} missing'
|
||||||
|
else:
|
||||||
|
reason = 'Device error'
|
||||||
|
else:
|
||||||
|
reason = 'Startup error'
|
||||||
|
|
||||||
|
# Record persistent error
|
||||||
|
health_persistence.record_error(
|
||||||
|
error_key=error_key,
|
||||||
|
category='vms',
|
||||||
|
severity='WARNING',
|
||||||
|
reason=reason,
|
||||||
|
details={'id': ctid, 'type': 'CT'}
|
||||||
|
)
|
||||||
|
issues.append(f'CT {ctid}: {reason}')
|
||||||
|
vm_details[error_key] = {
|
||||||
|
'status': 'WARNING',
|
||||||
|
'reason': reason,
|
||||||
|
'id': ctid,
|
||||||
|
'type': 'CT'
|
||||||
|
}
|
||||||
|
|
||||||
|
if not issues:
|
||||||
|
return {'status': 'OK'}
|
||||||
|
|
||||||
|
has_critical = any(d.get('status') == 'CRITICAL' for d in vm_details.values())
|
||||||
|
|
||||||
|
return {
|
||||||
|
'status': 'CRITICAL' if has_critical else 'WARNING',
|
||||||
|
'reason': '; '.join(issues[:3]),
|
||||||
|
'details': vm_details
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
return {'status': 'OK'}
|
||||||
|
|
||||||
def _check_pve_services(self) -> Dict[str, Any]:
|
def _check_pve_services(self) -> Dict[str, Any]:
|
||||||
"""Check critical Proxmox services"""
|
"""Check critical Proxmox services"""
|
||||||
try:
|
try:
|
||||||
@@ -980,13 +1137,24 @@ class HealthMonitor:
|
|||||||
'reason': f'Service check failed: {str(e)}'
|
'reason': f'Service check failed: {str(e)}'
|
||||||
}
|
}
|
||||||
|
|
||||||
def _check_logs_lightweight(self) -> Dict[str, Any]:
|
# Modified to use persistence
|
||||||
"""Lightweight log analysis (cached, checked every 5 minutes)"""
|
def _check_logs_with_persistence(self) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Check logs with persistent error tracking.
|
||||||
|
Critical log errors persist for 24h unless acknowledged.
|
||||||
|
"""
|
||||||
cache_key = 'logs_analysis'
|
cache_key = 'logs_analysis'
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
|
|
||||||
if cache_key in self.last_check_times:
|
if cache_key in self.last_check_times:
|
||||||
if current_time - self.last_check_times[cache_key] < self.LOG_CHECK_INTERVAL:
|
if current_time - self.last_check_times[cache_key] < self.LOG_CHECK_INTERVAL:
|
||||||
|
# Return persistent errors if any
|
||||||
|
persistent_errors = health_persistence.get_active_errors('logs')
|
||||||
|
if persistent_errors:
|
||||||
|
return {
|
||||||
|
'status': 'WARNING',
|
||||||
|
'reason': f'{len(persistent_errors)} persistent log issues'
|
||||||
|
}
|
||||||
return self.cached_results.get(cache_key, {'status': 'OK'})
|
return self.cached_results.get(cache_key, {'status': 'OK'})
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -1011,6 +1179,16 @@ class HealthMonitor:
|
|||||||
if keyword.lower() in line_lower:
|
if keyword.lower() in line_lower:
|
||||||
critical_keywords_found.append(keyword)
|
critical_keywords_found.append(keyword)
|
||||||
errors_5m += 1
|
errors_5m += 1
|
||||||
|
|
||||||
|
# Record persistent error for critical keywords
|
||||||
|
error_key = f'log_critical_{keyword.replace(" ", "_")}'
|
||||||
|
health_persistence.record_error(
|
||||||
|
error_key=error_key,
|
||||||
|
category='logs',
|
||||||
|
severity='CRITICAL',
|
||||||
|
reason=f'Critical log: {keyword}',
|
||||||
|
details={'keyword': keyword}
|
||||||
|
)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
if 'error' in line_lower or 'critical' in line_lower or 'fatal' in line_lower:
|
if 'error' in line_lower or 'critical' in line_lower or 'fatal' in line_lower:
|
||||||
|
|||||||
326
AppImage/scripts/health_persistence.py
Normal file
326
AppImage/scripts/health_persistence.py
Normal file
@@ -0,0 +1,326 @@
|
|||||||
|
"""
|
||||||
|
Health Monitor Persistence Module
|
||||||
|
Manages persistent error tracking across AppImage updates using SQLite.
|
||||||
|
Stores errors in /root/.config/proxmenux-monitor/health_monitor.db
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- Persistent error storage (survives AppImage updates)
|
||||||
|
- Smart error resolution (auto-clear when VM starts, or after 48h)
|
||||||
|
- Event system for future Telegram notifications
|
||||||
|
- Manual acknowledgment support
|
||||||
|
|
||||||
|
Author: MacRimi
|
||||||
|
Version: 1.0
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Dict, List, Any, Optional
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
class HealthPersistence:
|
||||||
|
"""Manages persistent health error tracking"""
|
||||||
|
|
||||||
|
# Error retention periods (seconds)
|
||||||
|
VM_ERROR_RETENTION = 48 * 3600 # 48 hours
|
||||||
|
LOG_ERROR_RETENTION = 24 * 3600 # 24 hours
|
||||||
|
DISK_ERROR_RETENTION = 48 * 3600 # 48 hours
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""Initialize persistence with database in config directory"""
|
||||||
|
self.data_dir = Path('/root/.config/proxmenux-monitor')
|
||||||
|
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
self.db_path = self.data_dir / 'health_monitor.db'
|
||||||
|
self._init_database()
|
||||||
|
|
||||||
|
def _init_database(self):
|
||||||
|
"""Initialize SQLite database with required tables"""
|
||||||
|
conn = sqlite3.connect(str(self.db_path))
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Errors table
|
||||||
|
cursor.execute('''
|
||||||
|
CREATE TABLE IF NOT EXISTS errors (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
error_key TEXT UNIQUE NOT NULL,
|
||||||
|
category TEXT NOT NULL,
|
||||||
|
severity TEXT NOT NULL,
|
||||||
|
reason TEXT NOT NULL,
|
||||||
|
details TEXT,
|
||||||
|
first_seen TEXT NOT NULL,
|
||||||
|
last_seen TEXT NOT NULL,
|
||||||
|
resolved_at TEXT,
|
||||||
|
acknowledged INTEGER DEFAULT 0,
|
||||||
|
notification_sent INTEGER DEFAULT 0
|
||||||
|
)
|
||||||
|
''')
|
||||||
|
|
||||||
|
# Events table (for future Telegram notifications)
|
||||||
|
cursor.execute('''
|
||||||
|
CREATE TABLE IF NOT EXISTS events (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
event_type TEXT NOT NULL,
|
||||||
|
error_key TEXT NOT NULL,
|
||||||
|
timestamp TEXT NOT NULL,
|
||||||
|
data TEXT
|
||||||
|
)
|
||||||
|
''')
|
||||||
|
|
||||||
|
# Indexes for performance
|
||||||
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_error_key ON errors(error_key)')
|
||||||
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_category ON errors(category)')
|
||||||
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_resolved ON errors(resolved_at)')
|
||||||
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_events_error ON events(error_key)')
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def record_error(self, error_key: str, category: str, severity: str,
|
||||||
|
reason: str, details: Optional[Dict] = None) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Record or update an error.
|
||||||
|
Returns event info (new_error, updated, etc.)
|
||||||
|
"""
|
||||||
|
conn = sqlite3.connect(str(self.db_path))
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
now = datetime.now().isoformat()
|
||||||
|
details_json = json.dumps(details) if details else None
|
||||||
|
|
||||||
|
# Check if error exists
|
||||||
|
cursor.execute('SELECT id, first_seen, notification_sent FROM errors WHERE error_key = ?',
|
||||||
|
(error_key,))
|
||||||
|
existing = cursor.fetchone()
|
||||||
|
|
||||||
|
event_info = {'type': 'updated', 'needs_notification': False}
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
# Update existing error
|
||||||
|
cursor.execute('''
|
||||||
|
UPDATE errors
|
||||||
|
SET last_seen = ?, severity = ?, reason = ?, details = ?, resolved_at = NULL
|
||||||
|
WHERE error_key = ?
|
||||||
|
''', (now, severity, reason, details_json, error_key))
|
||||||
|
|
||||||
|
# Check if severity escalated
|
||||||
|
cursor.execute('SELECT severity FROM errors WHERE error_key = ?', (error_key,))
|
||||||
|
old_severity = cursor.fetchone()[0]
|
||||||
|
if old_severity == 'WARNING' and severity == 'CRITICAL':
|
||||||
|
event_info['type'] = 'escalated'
|
||||||
|
event_info['needs_notification'] = True
|
||||||
|
else:
|
||||||
|
# Insert new error
|
||||||
|
cursor.execute('''
|
||||||
|
INSERT INTO errors
|
||||||
|
(error_key, category, severity, reason, details, first_seen, last_seen)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||||
|
''', (error_key, category, severity, reason, details_json, now, now))
|
||||||
|
|
||||||
|
event_info['type'] = 'new'
|
||||||
|
event_info['needs_notification'] = True
|
||||||
|
|
||||||
|
# Record event
|
||||||
|
self._record_event(cursor, event_info['type'], error_key,
|
||||||
|
{'severity': severity, 'reason': reason})
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
return event_info
|
||||||
|
|
||||||
|
def resolve_error(self, error_key: str, reason: str = 'auto-resolved'):
|
||||||
|
"""Mark an error as resolved"""
|
||||||
|
conn = sqlite3.connect(str(self.db_path))
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
now = datetime.now().isoformat()
|
||||||
|
|
||||||
|
cursor.execute('''
|
||||||
|
UPDATE errors
|
||||||
|
SET resolved_at = ?
|
||||||
|
WHERE error_key = ? AND resolved_at IS NULL
|
||||||
|
''', (now, error_key))
|
||||||
|
|
||||||
|
if cursor.rowcount > 0:
|
||||||
|
self._record_event(cursor, 'resolved', error_key, {'reason': reason})
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def acknowledge_error(self, error_key: str):
|
||||||
|
"""Manually acknowledge an error (won't notify again)"""
|
||||||
|
conn = sqlite3.connect(str(self.db_path))
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cursor.execute('''
|
||||||
|
UPDATE errors
|
||||||
|
SET acknowledged = 1
|
||||||
|
WHERE error_key = ?
|
||||||
|
''', (error_key,))
|
||||||
|
|
||||||
|
self._record_event(cursor, 'acknowledged', error_key, {})
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def get_active_errors(self, category: Optional[str] = None) -> List[Dict[str, Any]]:
|
||||||
|
"""Get all active (unresolved) errors, optionally filtered by category"""
|
||||||
|
conn = sqlite3.connect(str(self.db_path))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
if category:
|
||||||
|
cursor.execute('''
|
||||||
|
SELECT * FROM errors
|
||||||
|
WHERE resolved_at IS NULL AND category = ?
|
||||||
|
ORDER BY severity DESC, last_seen DESC
|
||||||
|
''', (category,))
|
||||||
|
else:
|
||||||
|
cursor.execute('''
|
||||||
|
SELECT * FROM errors
|
||||||
|
WHERE resolved_at IS NULL
|
||||||
|
ORDER BY severity DESC, last_seen DESC
|
||||||
|
''')
|
||||||
|
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
for row in rows:
|
||||||
|
error_dict = dict(row)
|
||||||
|
if error_dict.get('details'):
|
||||||
|
error_dict['details'] = json.loads(error_dict['details'])
|
||||||
|
errors.append(error_dict)
|
||||||
|
|
||||||
|
return errors
|
||||||
|
|
||||||
|
def cleanup_old_errors(self):
|
||||||
|
"""Clean up old resolved errors and auto-resolve stale errors"""
|
||||||
|
conn = sqlite3.connect(str(self.db_path))
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
now = datetime.now()
|
||||||
|
|
||||||
|
# Delete resolved errors older than 7 days
|
||||||
|
cutoff_resolved = (now - timedelta(days=7)).isoformat()
|
||||||
|
cursor.execute('DELETE FROM errors WHERE resolved_at < ?', (cutoff_resolved,))
|
||||||
|
|
||||||
|
# Auto-resolve VM/CT errors older than 48h
|
||||||
|
cutoff_vm = (now - timedelta(seconds=self.VM_ERROR_RETENTION)).isoformat()
|
||||||
|
cursor.execute('''
|
||||||
|
UPDATE errors
|
||||||
|
SET resolved_at = ?
|
||||||
|
WHERE category = 'vms'
|
||||||
|
AND resolved_at IS NULL
|
||||||
|
AND first_seen < ?
|
||||||
|
AND acknowledged = 0
|
||||||
|
''', (now.isoformat(), cutoff_vm))
|
||||||
|
|
||||||
|
# Auto-resolve log errors older than 24h
|
||||||
|
cutoff_logs = (now - timedelta(seconds=self.LOG_ERROR_RETENTION)).isoformat()
|
||||||
|
cursor.execute('''
|
||||||
|
UPDATE errors
|
||||||
|
SET resolved_at = ?
|
||||||
|
WHERE category = 'logs'
|
||||||
|
AND resolved_at IS NULL
|
||||||
|
AND first_seen < ?
|
||||||
|
AND acknowledged = 0
|
||||||
|
''', (now.isoformat(), cutoff_logs))
|
||||||
|
|
||||||
|
# Delete old events (>30 days)
|
||||||
|
cutoff_events = (now - timedelta(days=30)).isoformat()
|
||||||
|
cursor.execute('DELETE FROM events WHERE timestamp < ?', (cutoff_events,))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def check_vm_running(self, vm_id: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check if a VM/CT is running and resolve error if so.
|
||||||
|
Returns True if running and error was resolved.
|
||||||
|
"""
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Check qm status for VMs
|
||||||
|
result = subprocess.run(
|
||||||
|
['qm', 'status', vm_id],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=2
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.returncode == 0 and 'running' in result.stdout.lower():
|
||||||
|
self.resolve_error(f'vm_{vm_id}', 'VM started')
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Check pct status for containers
|
||||||
|
result = subprocess.run(
|
||||||
|
['pct', 'status', vm_id],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=2
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.returncode == 0 and 'running' in result.stdout.lower():
|
||||||
|
self.resolve_error(f'ct_{vm_id}', 'Container started')
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _record_event(self, cursor, event_type: str, error_key: str, data: Dict):
|
||||||
|
"""Internal: Record an event"""
|
||||||
|
cursor.execute('''
|
||||||
|
INSERT INTO events (event_type, error_key, timestamp, data)
|
||||||
|
VALUES (?, ?, ?, ?)
|
||||||
|
''', (event_type, error_key, datetime.now().isoformat(), json.dumps(data)))
|
||||||
|
|
||||||
|
def get_unnotified_errors(self) -> List[Dict[str, Any]]:
|
||||||
|
"""Get errors that need Telegram notification"""
|
||||||
|
conn = sqlite3.connect(str(self.db_path))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cursor.execute('''
|
||||||
|
SELECT * FROM errors
|
||||||
|
WHERE notification_sent = 0
|
||||||
|
AND resolved_at IS NULL
|
||||||
|
AND acknowledged = 0
|
||||||
|
ORDER BY severity DESC, first_seen ASC
|
||||||
|
''')
|
||||||
|
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
for row in rows:
|
||||||
|
error_dict = dict(row)
|
||||||
|
if error_dict.get('details'):
|
||||||
|
error_dict['details'] = json.loads(error_dict['details'])
|
||||||
|
errors.append(error_dict)
|
||||||
|
|
||||||
|
return errors
|
||||||
|
|
||||||
|
def mark_notified(self, error_key: str):
|
||||||
|
"""Mark error as notified"""
|
||||||
|
conn = sqlite3.connect(str(self.db_path))
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cursor.execute('''
|
||||||
|
UPDATE errors
|
||||||
|
SET notification_sent = 1
|
||||||
|
WHERE error_key = ?
|
||||||
|
''', (error_key,))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
# Global instance
|
||||||
|
health_persistence = HealthPersistence()
|
||||||
Reference in New Issue
Block a user