mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-30 03:16:34 +00:00
Update health_persistence.py
This commit is contained in:
@@ -1027,23 +1027,67 @@ class HealthPersistence:
|
|||||||
OR reason LIKE '%killed%process%')
|
OR reason LIKE '%killed%process%')
|
||||||
''', (now_iso, stale_mem_cutoff))
|
''', (now_iso, stale_mem_cutoff))
|
||||||
|
|
||||||
# ── 4. VMS category: Auto-resolve if VM/CT is now running ──
|
# ── 4. VMS category: Auto-resolve if VM/CT is now running or deleted ──
|
||||||
# Check all active VM/CT errors and resolve if the VM/CT is now running
|
# Check all active VM/CT errors and resolve if the VM/CT is now running
|
||||||
|
# NOTE: We do this inline to avoid deadlock (check_vm_running uses _db_lock)
|
||||||
cursor.execute('''
|
cursor.execute('''
|
||||||
SELECT error_key, category FROM errors
|
SELECT error_key, category, reason FROM errors
|
||||||
WHERE (category IN ('vms', 'vmct') OR error_key LIKE 'vm_%' OR error_key LIKE 'ct_%' OR error_key LIKE 'vmct_%')
|
WHERE (category IN ('vms', 'vmct') OR error_key LIKE 'vm_%' OR error_key LIKE 'ct_%' OR error_key LIKE 'vmct_%')
|
||||||
AND resolved_at IS NULL
|
AND resolved_at IS NULL
|
||||||
AND acknowledged = 0
|
AND acknowledged = 0
|
||||||
''')
|
''')
|
||||||
vm_errors = cursor.fetchall()
|
vm_errors = cursor.fetchall()
|
||||||
for error_key, cat in vm_errors:
|
for error_key, cat, reason in vm_errors:
|
||||||
# Extract VM/CT ID from error_key
|
# Extract VM/CT ID from error_key
|
||||||
import re
|
|
||||||
vmid_match = re.search(r'(?:vm_|ct_|vmct_)(\d+)', error_key)
|
vmid_match = re.search(r'(?:vm_|ct_|vmct_)(\d+)', error_key)
|
||||||
if vmid_match:
|
if vmid_match:
|
||||||
vmid = vmid_match.group(1)
|
vmid = vmid_match.group(1)
|
||||||
# Check if running - this auto-resolves if so
|
try:
|
||||||
self.check_vm_running(vmid)
|
# Check if VM/CT exists and is running
|
||||||
|
vm_running = False
|
||||||
|
ct_running = False
|
||||||
|
vm_exists = False
|
||||||
|
ct_exists = False
|
||||||
|
|
||||||
|
# Check VM
|
||||||
|
result_vm = subprocess.run(
|
||||||
|
['qm', 'status', vmid],
|
||||||
|
capture_output=True, text=True, timeout=2
|
||||||
|
)
|
||||||
|
if result_vm.returncode == 0:
|
||||||
|
vm_exists = True
|
||||||
|
vm_running = 'running' in result_vm.stdout.lower()
|
||||||
|
|
||||||
|
# Check CT
|
||||||
|
if not vm_exists:
|
||||||
|
result_ct = subprocess.run(
|
||||||
|
['pct', 'status', vmid],
|
||||||
|
capture_output=True, text=True, timeout=2
|
||||||
|
)
|
||||||
|
if result_ct.returncode == 0:
|
||||||
|
ct_exists = True
|
||||||
|
ct_running = 'running' in result_ct.stdout.lower()
|
||||||
|
|
||||||
|
# Resolve if deleted
|
||||||
|
if not vm_exists and not ct_exists:
|
||||||
|
cursor.execute('''
|
||||||
|
UPDATE errors SET resolved_at = ?
|
||||||
|
WHERE error_key = ? AND resolved_at IS NULL
|
||||||
|
''', (now_iso, error_key))
|
||||||
|
# Resolve transient errors if running (not persistent config errors)
|
||||||
|
elif vm_running or ct_running:
|
||||||
|
reason_lower = (reason or '').lower()
|
||||||
|
is_persistent = any(x in reason_lower for x in [
|
||||||
|
'device', 'missing', 'does not exist', 'permission',
|
||||||
|
'not found', 'no such', 'invalid'
|
||||||
|
])
|
||||||
|
if not is_persistent:
|
||||||
|
cursor.execute('''
|
||||||
|
UPDATE errors SET resolved_at = ?
|
||||||
|
WHERE error_key = ? AND resolved_at IS NULL
|
||||||
|
''', (now_iso, error_key))
|
||||||
|
except Exception:
|
||||||
|
pass # Skip this VM/CT if check fails
|
||||||
|
|
||||||
# ── 5. GENERIC: Any error not seen in 30 minutes while system is healthy ──
|
# ── 5. GENERIC: Any error not seen in 30 minutes while system is healthy ──
|
||||||
# If CPU < 80% and Memory < 85% and error hasn't been seen in 30 min,
|
# If CPU < 80% and Memory < 85% and error hasn't been seen in 30 min,
|
||||||
|
|||||||
Reference in New Issue
Block a user