mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-25 08:56:21 +00:00
Update notification service
This commit is contained in:
@@ -1093,7 +1093,7 @@ class HealthPersistence:
|
|||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
# ─── System Capabilities Cache ───────────────────────────────
|
# ─── System Capabilities Cache ────────────────────<EFBFBD><EFBFBD>──────────
|
||||||
|
|
||||||
def get_capability(self, cap_key: str) -> Optional[str]:
|
def get_capability(self, cap_key: str) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -2144,60 +2144,98 @@ class PollingCollector:
|
|||||||
self._first_poll_done = True
|
self._first_poll_done = True
|
||||||
|
|
||||||
def _check_startup_aggregation(self):
|
def _check_startup_aggregation(self):
|
||||||
"""Check if startup period ended and emit aggregated VM/CT start message.
|
"""Check if startup period ended and emit comprehensive startup report.
|
||||||
|
|
||||||
During the startup grace period, TaskWatcher collects VM/CT starts instead
|
At the end of the health grace period, collects:
|
||||||
of emitting individual notifications. Once the period ends, this method
|
- VMs/CTs that started successfully
|
||||||
emits a single aggregated "System startup" notification.
|
- VMs/CTs that failed to start
|
||||||
|
- Service status
|
||||||
|
- Storage status
|
||||||
|
- Journal errors (for AI enrichment)
|
||||||
|
|
||||||
|
Emits a single "system_startup" notification with full report data.
|
||||||
"""
|
"""
|
||||||
# Only check once startup period is over
|
# Wait until health grace period is over (5 min) for complete picture
|
||||||
if _shared_state.is_startup_period():
|
if startup_grace.is_startup_health_grace():
|
||||||
return
|
return
|
||||||
|
|
||||||
# Only emit once
|
# Only emit once
|
||||||
if _shared_state.was_startup_aggregated():
|
if startup_grace.was_startup_aggregated():
|
||||||
return
|
return
|
||||||
|
|
||||||
# Get all collected startup VMs/CTs
|
# Collect comprehensive startup report
|
||||||
startup_items = _shared_state.get_and_clear_startup_vms()
|
report = startup_grace.collect_startup_report()
|
||||||
if not startup_items:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Count VMs and CTs
|
# Generate human-readable summary
|
||||||
vms = [(vmid, name) for vmid, name, vtype in startup_items if vtype == 'vm']
|
summary = startup_grace.format_startup_summary(report)
|
||||||
cts = [(vmid, name) for vmid, name, vtype in startup_items if vtype == 'ct']
|
|
||||||
|
|
||||||
vm_count = len(vms)
|
# Count totals
|
||||||
ct_count = len(cts)
|
vms_ok = len(report.get('vms_started', []))
|
||||||
total = vm_count + ct_count
|
cts_ok = len(report.get('cts_started', []))
|
||||||
|
vms_fail = len(report.get('vms_failed', []))
|
||||||
|
cts_fail = len(report.get('cts_failed', []))
|
||||||
|
total_ok = vms_ok + cts_ok
|
||||||
|
total_fail = vms_fail + cts_fail
|
||||||
|
|
||||||
# Build entity list (max 10 items for readability)
|
# Build entity list for backwards compatibility
|
||||||
entity_names = []
|
entity_names = []
|
||||||
for vmid, name in (vms + cts)[:10]:
|
for vm in report.get('vms_started', [])[:5]:
|
||||||
entity_names.append(f'{name} ({vmid})')
|
entity_names.append(f"{vm['name']} ({vm['vmid']})")
|
||||||
if total > 10:
|
for ct in report.get('cts_started', [])[:5]:
|
||||||
entity_names.append(f'...and {total - 10} more')
|
entity_names.append(f"{ct['name']} ({ct['vmid']})")
|
||||||
|
if total_ok > 10:
|
||||||
|
entity_names.append(f"...and {total_ok - 10} more")
|
||||||
|
|
||||||
# Build summary text
|
# Determine severity based on issues
|
||||||
parts = []
|
has_issues = (
|
||||||
if vm_count:
|
total_fail > 0 or
|
||||||
parts.append(f'{vm_count} VM{"s" if vm_count != 1 else ""}')
|
not report.get('services_ok', True) or
|
||||||
if ct_count:
|
not report.get('storage_ok', True) or
|
||||||
parts.append(f'{ct_count} CT{"s" if ct_count != 1 else ""}')
|
report.get('health_status') in ['CRITICAL', 'WARNING']
|
||||||
summary = ' and '.join(parts) + ' started'
|
)
|
||||||
|
severity = 'WARNING' if has_issues else 'INFO'
|
||||||
|
|
||||||
|
# Build notification data
|
||||||
data = {
|
data = {
|
||||||
'hostname': self._hostname,
|
'hostname': self._hostname,
|
||||||
'summary': summary,
|
'summary': summary,
|
||||||
'vm_count': vm_count,
|
|
||||||
'ct_count': ct_count,
|
# VM/CT counts (backwards compatible)
|
||||||
'total_count': total,
|
'vm_count': vms_ok,
|
||||||
|
'ct_count': cts_ok,
|
||||||
|
'total_count': total_ok,
|
||||||
'entity_list': ', '.join(entity_names),
|
'entity_list': ', '.join(entity_names),
|
||||||
'reason': f'System startup completed: {summary}',
|
|
||||||
|
# New: failure counts
|
||||||
|
'vms_failed_count': vms_fail,
|
||||||
|
'cts_failed_count': cts_fail,
|
||||||
|
'total_failed': total_fail,
|
||||||
|
|
||||||
|
# New: detailed lists
|
||||||
|
'vms_started': report.get('vms_started', []),
|
||||||
|
'cts_started': report.get('cts_started', []),
|
||||||
|
'vms_failed': report.get('vms_failed', []),
|
||||||
|
'cts_failed': report.get('cts_failed', []),
|
||||||
|
|
||||||
|
# New: system status
|
||||||
|
'services_ok': report.get('services_ok', True),
|
||||||
|
'services_failed': report.get('services_failed', []),
|
||||||
|
'storage_ok': report.get('storage_ok', True),
|
||||||
|
'storage_unavailable': report.get('storage_unavailable', []),
|
||||||
|
'health_status': report.get('health_status', 'UNKNOWN'),
|
||||||
|
'health_issues': report.get('health_issues', []),
|
||||||
|
|
||||||
|
# For AI enrichment
|
||||||
|
'_journal_context': report.get('_journal_context', ''),
|
||||||
|
|
||||||
|
# Metadata
|
||||||
|
'startup_duration_seconds': report.get('startup_duration_seconds', 0),
|
||||||
|
'has_issues': has_issues,
|
||||||
|
'reason': summary.split('\n')[0], # First line as reason
|
||||||
}
|
}
|
||||||
|
|
||||||
self._queue.put(NotificationEvent(
|
self._queue.put(NotificationEvent(
|
||||||
'system_startup', 'INFO', data, source='polling',
|
'system_startup', severity, data, source='polling',
|
||||||
entity='node', entity_id='',
|
entity='node', entity_id='',
|
||||||
))
|
))
|
||||||
|
|
||||||
@@ -2500,7 +2538,7 @@ class PollingCollector:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[PollingCollector] AI model check failed: {e}")
|
print(f"[PollingCollector] AI model check failed: {e}")
|
||||||
|
|
||||||
# ── Persistence helpers ────────────────────────────────────
|
# ── Persistence helpers ──────────────────────────────<EFBFBD><EFBFBD>─────
|
||||||
|
|
||||||
def _load_last_notified(self):
|
def _load_last_notified(self):
|
||||||
"""Load per-error notification timestamps from DB on startup."""
|
"""Load per-error notification timestamps from DB on startup."""
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ import socket
|
|||||||
import time
|
import time
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import urllib.error
|
import urllib.error
|
||||||
from typing import Dict, Any, Optional, List
|
from typing import Dict, Any, Optional, List, Tuple
|
||||||
|
|
||||||
|
|
||||||
# ─── vzdump message parser ───────────────────────────────────────
|
# ─── vzdump message parser ───────────────────────────────────────
|
||||||
@@ -314,6 +314,90 @@ def _format_vzdump_body(parsed: Dict[str, Any], is_success: bool) -> str:
|
|||||||
return '\n'.join(parts)
|
return '\n'.join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def _format_system_startup(data: Dict[str, Any]) -> Tuple[str, str]:
|
||||||
|
"""
|
||||||
|
Format comprehensive system startup report.
|
||||||
|
|
||||||
|
Returns (title, body) tuple for the notification.
|
||||||
|
Handles both simple startups (all OK) and those with issues.
|
||||||
|
"""
|
||||||
|
hostname = data.get('hostname', 'unknown')
|
||||||
|
has_issues = data.get('has_issues', False)
|
||||||
|
|
||||||
|
# Build title
|
||||||
|
if has_issues:
|
||||||
|
total_issues = (
|
||||||
|
data.get('total_failed', 0) +
|
||||||
|
len(data.get('services_failed', [])) +
|
||||||
|
len(data.get('storage_unavailable', []))
|
||||||
|
)
|
||||||
|
title = f"{hostname}: System startup - {total_issues} issue(s) detected"
|
||||||
|
else:
|
||||||
|
title = f"{hostname}: System startup completed"
|
||||||
|
|
||||||
|
# Build body
|
||||||
|
parts = []
|
||||||
|
|
||||||
|
# Overall status
|
||||||
|
if not has_issues:
|
||||||
|
parts.append("All systems operational.")
|
||||||
|
|
||||||
|
# VMs/CTs started
|
||||||
|
vms_ok = len(data.get('vms_started', []))
|
||||||
|
cts_ok = len(data.get('cts_started', []))
|
||||||
|
if vms_ok or cts_ok:
|
||||||
|
count_parts = []
|
||||||
|
if vms_ok:
|
||||||
|
count_parts.append(f"{vms_ok} VM{'s' if vms_ok > 1 else ''}")
|
||||||
|
if cts_ok:
|
||||||
|
count_parts.append(f"{cts_ok} CT{'s' if cts_ok > 1 else ''}")
|
||||||
|
|
||||||
|
# List names (up to 5)
|
||||||
|
names = []
|
||||||
|
for vm in data.get('vms_started', [])[:3]:
|
||||||
|
names.append(f"{vm['name']} ({vm['vmid']})")
|
||||||
|
for ct in data.get('cts_started', [])[:3]:
|
||||||
|
names.append(f"{ct['name']} ({ct['vmid']})")
|
||||||
|
|
||||||
|
line = f"\u2705 {' and '.join(count_parts)} started"
|
||||||
|
if names:
|
||||||
|
if len(names) <= 5:
|
||||||
|
line += f": {', '.join(names)}"
|
||||||
|
else:
|
||||||
|
line += f": {', '.join(names[:5])}..."
|
||||||
|
parts.append(line)
|
||||||
|
|
||||||
|
# Failed VMs/CTs
|
||||||
|
for vm in data.get('vms_failed', []):
|
||||||
|
reason = vm.get('reason', 'unknown error')
|
||||||
|
parts.append(f"\u274C VM failed: {vm['name']} - {reason}")
|
||||||
|
|
||||||
|
for ct in data.get('cts_failed', []):
|
||||||
|
reason = ct.get('reason', 'unknown error')
|
||||||
|
parts.append(f"\u274C CT failed: {ct['name']} - {reason}")
|
||||||
|
|
||||||
|
# Storage issues
|
||||||
|
storage_unavailable = data.get('storage_unavailable', [])
|
||||||
|
if storage_unavailable:
|
||||||
|
names = [s['name'] for s in storage_unavailable[:3]]
|
||||||
|
parts.append(f"\u26A0\uFE0F Storage: {len(storage_unavailable)} unavailable ({', '.join(names)})")
|
||||||
|
|
||||||
|
# Service issues
|
||||||
|
services_failed = data.get('services_failed', [])
|
||||||
|
if services_failed:
|
||||||
|
names = [s['name'] for s in services_failed[:3]]
|
||||||
|
parts.append(f"\u26A0\uFE0F Services: {len(services_failed)} failed ({', '.join(names)})")
|
||||||
|
|
||||||
|
# Startup duration
|
||||||
|
duration = data.get('startup_duration_seconds', 0)
|
||||||
|
if duration:
|
||||||
|
minutes = int(duration // 60)
|
||||||
|
parts.append(f"\u23F1\uFE0F Startup completed in {minutes} min")
|
||||||
|
|
||||||
|
body = '\n'.join(parts)
|
||||||
|
return title, body
|
||||||
|
|
||||||
|
|
||||||
# ─── Severity Icons ──────────────────────────────────────────────
|
# ─── Severity Icons ──────────────────────────────────────────────
|
||||||
|
|
||||||
SEVERITY_ICONS = {
|
SEVERITY_ICONS = {
|
||||||
@@ -645,11 +729,12 @@ TEMPLATES = {
|
|||||||
|
|
||||||
# ── Services events ──
|
# ── Services events ──
|
||||||
'system_startup': {
|
'system_startup': {
|
||||||
'title': '{hostname}: System startup — {summary}',
|
'title': '{hostname}: {reason}',
|
||||||
'body': 'System startup completed.\n{summary}\n\nGuests: {entity_list}',
|
'body': '{summary}',
|
||||||
'label': 'System startup',
|
'label': 'System startup report',
|
||||||
'group': 'services',
|
'group': 'services',
|
||||||
'default_enabled': True,
|
'default_enabled': True,
|
||||||
|
'formatter': '_format_system_startup',
|
||||||
},
|
},
|
||||||
'system_shutdown': {
|
'system_shutdown': {
|
||||||
'title': '{hostname}: System shutting down',
|
'title': '{hostname}: System shutting down',
|
||||||
@@ -959,7 +1044,19 @@ def render_template(event_type: str, data: Dict[str, Any]) -> Dict[str, Any]:
|
|||||||
pve_message = data.get('pve_message', '')
|
pve_message = data.get('pve_message', '')
|
||||||
pve_title = data.get('pve_title', '')
|
pve_title = data.get('pve_title', '')
|
||||||
|
|
||||||
if event_type in ('backup_complete', 'backup_fail') and pve_message:
|
# Check for custom formatter function
|
||||||
|
formatter_name = template.get('formatter')
|
||||||
|
if formatter_name and formatter_name in globals():
|
||||||
|
formatter_func = globals()[formatter_name]
|
||||||
|
try:
|
||||||
|
title, body_text = formatter_func(data)
|
||||||
|
except Exception:
|
||||||
|
# Fallback to standard formatting if formatter fails
|
||||||
|
try:
|
||||||
|
body_text = template['body'].format(**variables)
|
||||||
|
except (KeyError, ValueError):
|
||||||
|
body_text = template['body']
|
||||||
|
elif event_type in ('backup_complete', 'backup_fail') and pve_message:
|
||||||
parsed = _parse_vzdump_message(pve_message)
|
parsed = _parse_vzdump_message(pve_message)
|
||||||
if parsed:
|
if parsed:
|
||||||
is_success = (event_type == 'backup_complete')
|
is_success = (event_type == 'backup_complete')
|
||||||
@@ -1288,134 +1385,165 @@ AI_DETAIL_TOKENS = {
|
|||||||
# System prompt template - informative, no recommendations
|
# System prompt template - informative, no recommendations
|
||||||
AI_SYSTEM_PROMPT = """You are a system notification formatter for ProxMenux Monitor, a Proxmox VE monitoring tool.
|
AI_SYSTEM_PROMPT = """You are a system notification formatter for ProxMenux Monitor, a Proxmox VE monitoring tool.
|
||||||
|
|
||||||
Your task is to translate and reformat incoming server alert messages into {language}.
|
Your task is to translate and lightly reformat incoming server alert messages into {language}.
|
||||||
|
|
||||||
|
═══ CORE ROLE ═══
|
||||||
|
You are a formatter, not an analyst.
|
||||||
|
Translate, clean, and present the message clearly.
|
||||||
|
Do NOT reinterpret the event, do NOT add meaning, and do NOT rebuild the message from scratch.
|
||||||
|
|
||||||
═══ ABSOLUTE RULES ═══
|
═══ ABSOLUTE RULES ═══
|
||||||
1. Translate BOTH title and body to {language}. Every word, label, and unit must be in {language}.
|
1. Translate BOTH title and body into {language}.
|
||||||
2. NO markdown: no **bold**, no *italic*, no `code`, no headers (#), no bullet lists (- or *)
|
|
||||||
3. Plain text only — the output is sent to chat apps and email which handle their own formatting
|
|
||||||
4. Tone: factual, concise, technical. No greetings, no closings, no apologies
|
|
||||||
5. DO NOT add recommendations, action items, or suggestions ("you should…", "consider…")
|
|
||||||
6. Present ONLY the facts already in the input — do not invent or assume information
|
|
||||||
7. OUTPUT ONLY THE FINAL RESULT — never include both original and processed versions.
|
|
||||||
Do NOT append "Original message:", "Original:", "Source:", or any before/after comparison.
|
|
||||||
Return ONLY the single, final formatted message in {language}.
|
|
||||||
8. PLAIN NARRATIVE LINES — if a line in the input is a complete sentence (not a "Label: value"
|
|
||||||
pair), translate it as-is. Never prepend "Message:", "Note:", or any other label to a sentence.
|
|
||||||
9. Detail level to apply: {detail_level}
|
|
||||||
- brief → 2-3 lines, essential data only (status + key metric)
|
|
||||||
- standard → short paragraph covering who/what/where and the key value
|
|
||||||
- detailed → full technical breakdown of all available fields
|
|
||||||
10. Keep the "hostname: " prefix in the title. Translate only the descriptive part.
|
|
||||||
Example: "pve01: Updates available" → "pve01: Actualizaciones disponibles"
|
|
||||||
11. EMPTY LIST VALUES — if a list field is empty, "none", or "0":
|
|
||||||
Always write the translated word for "none" on the line after the label, never leave it blank.
|
|
||||||
Example: 🗂️ Important packages:\\n• none
|
|
||||||
Example (Spanish): 🗂️ Paquetes importantes:\\n• ninguno
|
|
||||||
Example (Français): 🗂️ Paquets importants:\\n• aucun
|
|
||||||
12. DEDUPLICATION — input may contain redundant or repeated information from multiple monitoring sources:
|
|
||||||
- Identify and merge duplicate facts (same device, same error, same metric mentioned twice)
|
|
||||||
- Present each unique fact exactly once in a clear, consolidated form
|
|
||||||
- If the same data appears in different formats, choose the most informative version
|
|
||||||
13. PROXMOX CONTEXT — silently translate Proxmox technical references into plain language.
|
|
||||||
Never explain what the term means — just use the human-readable equivalent directly.
|
|
||||||
|
|
||||||
Service / process name mapping (replace the raw name with the friendly form):
|
2. Translate human-readable text only.
|
||||||
- "pve-container@XXXX.service" → "Container CT XXXX"
|
Do NOT translate:
|
||||||
- "qemu-server@XXXX.service" → "Virtual Machine VM XXXX"
|
- hostnames
|
||||||
- "pvesr-XXXX" → "storage replication job for XXXX"
|
- device paths (/dev/sdX, /dev/nvmeXnX)
|
||||||
- "vzdump" → "backup process"
|
- filesystem paths
|
||||||
- "pveproxy" → "Proxmox web proxy"
|
- IDs, VMIDs, CTIDs, UUIDs
|
||||||
- "pvedaemon" → "Proxmox daemon"
|
- timestamps, dates, archive names, PBS paths
|
||||||
- "pvestatd" → "Proxmox statistics service"
|
- version numbers
|
||||||
- "pvescheduler" → "Proxmox task scheduler"
|
- technical units (B, KB, MB, GB, TB, KiB, MiB, GiB, TiB, %, ms, s)
|
||||||
- "pve-cluster" → "Proxmox cluster service"
|
|
||||||
- "corosync" → "cluster communication service"
|
|
||||||
- "ceph-osd@N" → "Ceph storage disk N"
|
|
||||||
- "ceph-mon" → "Ceph monitor service"
|
|
||||||
|
|
||||||
systemd message patterns (rewrite the whole phrase, not just the service name):
|
3. Plain text only.
|
||||||
- "systemd[1]: pve-container@9000.service: Failed"
|
No markdown: no **bold**, no *italic*, no `code`, no headers (#), no markdown lists (- or *).
|
||||||
→ "Container CT 9000 service failed"
|
The bullet character "•" is allowed only where explicitly required.
|
||||||
- "systemd[1]: qemu-server@100.service: Failed with result 'exit-code'"
|
|
||||||
→ "Virtual Machine VM 100 failed to start"
|
|
||||||
- "systemd[1]: Started pve-container@9000.service"
|
|
||||||
→ "Container CT 9000 started"
|
|
||||||
|
|
||||||
ATA / SMART / kernel error patterns (replace raw kernel log with plain description):
|
4. Tone: factual, concise, technical.
|
||||||
- "ata8.00: exception Emask 0x1 SAct 0x4ce0 SErr 0x40000 action 0x0"
|
No greetings, no closings, no apologies, no conversational filler.
|
||||||
→ "ATA controller error on port 8"
|
|
||||||
- "blk_update_request: I/O error, dev sdX, sector NNNN"
|
5. Do NOT add recommendations, action items, remediation, or suggestions.
|
||||||
→ "I/O error on disk /dev/sdX at sector NNNN"
|
|
||||||
- "SCSI error: return code = 0x08000002"
|
6. Present ONLY the facts already present in the input.
|
||||||
→ "SCSI communication error"
|
Do NOT invent, assume, explain, soften, or escalate anything.
|
||||||
|
|
||||||
|
7. Do NOT change severity or status meaning.
|
||||||
|
For example:
|
||||||
|
- "failed" must stay a failure
|
||||||
|
- "warning" must stay a warning
|
||||||
|
- "degraded" must stay degraded
|
||||||
|
|
||||||
|
8. Preserve structure whenever possible.
|
||||||
|
Keep the same fields, lines, and data already present in the input.
|
||||||
|
Do NOT remove important lines such as storage, archive path, totals, durations, target node, reason, or summaries.
|
||||||
|
|
||||||
|
9. Reordering must be minimal.
|
||||||
|
Only reorder lines if it clearly improves readability without changing meaning.
|
||||||
|
|
||||||
|
10. PLAIN NARRATIVE LINES:
|
||||||
|
If a line is already a complete sentence, translate it as a sentence.
|
||||||
|
Do NOT prepend labels like "Message:", "Note:", or "Details:" unless they already exist in the input.
|
||||||
|
|
||||||
|
11. Detail level to apply: {detail_level}
|
||||||
|
- brief → compact output, keep only essential lines, but never remove critical facts
|
||||||
|
- standard → preserve structure with moderate cleanup
|
||||||
|
- detailed → preserve all available technical details
|
||||||
|
|
||||||
|
12. DEDUPLICATION:
|
||||||
|
Remove ONLY exact duplicates or obviously duplicated repeated lines.
|
||||||
|
Do NOT merge distinct facts just because they look similar.
|
||||||
|
Do NOT summarize multiple separate events into one.
|
||||||
|
|
||||||
|
13. Keep the "hostname: " prefix in the title.
|
||||||
|
Translate only the descriptive part.
|
||||||
|
Example: "pve01: Updates available" → "pve01: Actualizaciones disponibles"
|
||||||
|
|
||||||
|
14. EMPTY VALUES:
|
||||||
|
If a list field is empty, "none", "0", or equivalent, write the translated word for "none".
|
||||||
|
Never leave a declared field blank.
|
||||||
|
|
||||||
|
15. UNKNOWN INPUT:
|
||||||
|
If the message format is unfamiliar, preserve it as closely as possible and translate faithfully.
|
||||||
|
Do NOT force it into another template.
|
||||||
|
|
||||||
|
═══ PROXMOX CONTEXT ═══
|
||||||
|
Silently replace raw Proxmox technical references with the clearer forms below.
|
||||||
|
Do NOT explain them. Just use the friendly equivalent directly.
|
||||||
|
|
||||||
|
Service / process mappings:
|
||||||
|
- "pve-container@XXXX.service" → "Container CT XXXX"
|
||||||
|
- "qemu-server@XXXX.service" → "Virtual Machine VM XXXX"
|
||||||
|
- "pvesr-XXXX" → "storage replication job for XXXX"
|
||||||
|
- "vzdump" → "backup process"
|
||||||
|
- "pveproxy" → "Proxmox web proxy"
|
||||||
|
- "pvedaemon" → "Proxmox daemon"
|
||||||
|
- "pvestatd" → "Proxmox statistics service"
|
||||||
|
- "pvescheduler" → "Proxmox task scheduler"
|
||||||
|
- "pve-cluster" → "Proxmox cluster service"
|
||||||
|
- "corosync" → "cluster communication service"
|
||||||
|
- "ceph-osd@N" → "Ceph storage disk N"
|
||||||
|
- "ceph-mon" → "Ceph monitor service"
|
||||||
|
|
||||||
|
Systemd-style patterns:
|
||||||
|
- "systemd[1]: pve-container@9000.service: Failed"
|
||||||
|
→ "Container CT 9000 service failed"
|
||||||
|
- "systemd[1]: qemu-server@100.service: Failed with result 'exit-code'"
|
||||||
|
→ "Virtual Machine VM 100 failed to start"
|
||||||
|
- "systemd[1]: Started pve-container@9000.service"
|
||||||
|
→ "Container CT 9000 started"
|
||||||
|
|
||||||
|
Kernel / storage patterns:
|
||||||
|
- "ata8.00: exception Emask ..."
|
||||||
|
→ "ATA controller error on port 8"
|
||||||
|
- "blk_update_request: I/O error, dev sdX, sector NNNN"
|
||||||
|
→ "I/O error on disk /dev/sdX at sector NNNN"
|
||||||
|
- "SCSI error: return code = 0x08000002"
|
||||||
|
→ "SCSI communication error"
|
||||||
|
|
||||||
|
Apply these mappings in titles, field values, and body text when the raw technical string appears.
|
||||||
|
|
||||||
Apply these mappings everywhere: in the body narrative, in field values, and when
|
|
||||||
the raw technical string appears inside a longer sentence.
|
|
||||||
{emoji_instructions}
|
{emoji_instructions}
|
||||||
|
|
||||||
═══ MESSAGE TYPES — FORMAT RULES ═══
|
═══ MESSAGE-TYPE GUIDANCE ═══
|
||||||
|
|
||||||
BACKUP (backup_complete / backup_fail / backup_start):
|
BACKUP (backup_complete / backup_fail / backup_start):
|
||||||
Input contains: VM/CT names, IDs, size, duration, storage location, status per VM
|
- Preserve per-VM / per-CT detail if present.
|
||||||
Output body: first line is plain text (no emoji) describing the event briefly.
|
- Preserve size, duration, storage/archive path, and final summary if present.
|
||||||
Then list each VM/CT with its fields. End with a summary line.
|
- If both successes and failures are present in the same backup job, use a title equivalent to "Backup partially failed".
|
||||||
PARTIAL FAILURE RULE: if some VMs succeeded and at least one failed, use a combined title
|
- Do NOT collapse multi-guest backup results into a single generic sentence.
|
||||||
like "Backup partially failed" / "Copia de seguridad parcialmente fallida" — never say
|
|
||||||
"backup failed" when there are also successful VMs in the same job.
|
|
||||||
NEVER omit the storage/archive line or the summary line — always include them even for long jobs.
|
|
||||||
|
|
||||||
UPDATES (update_summary):
|
UPDATES (update_summary):
|
||||||
- Each count on its own line with its label.
|
- Keep each count on its own line.
|
||||||
- Package list uses "• " (bullet + space) per package, NOT the 🗂️ emoji on each line.
|
- Keep the important packages block if present.
|
||||||
- The 🗂️ emoji goes only on the "Important packages:" header line.
|
- Use "• " for package items.
|
||||||
- NEVER add a redundant summary line repeating the total count.
|
- Do NOT add a redundant summary line repeating totals already shown.
|
||||||
|
|
||||||
PVE UPDATE (pve_update):
|
|
||||||
- First line: plain sentence announcing the new version (no emoji on this line).
|
|
||||||
- Blank line after intro.
|
|
||||||
- Current version: 🔹 prefix | New version: 🟢 prefix
|
|
||||||
- Blank line before packages block.
|
|
||||||
- Packages header: 🗂️ | Package lines: 📌 prefix with version arrow v{{old}} ➜ v{{new}}
|
|
||||||
|
|
||||||
DISK / SMART ERRORS (disk_io_error / storage_unavailable):
|
PVE UPDATE (pve_update):
|
||||||
Input contains: device name, error type, SMART values or I/O error codes
|
- Preserve current version, new version, and package list if present.
|
||||||
Output body: device, then the specific error or failing attribute
|
- Keep the announcement concise.
|
||||||
DEDUPLICATION: Input may contain repeated or similar information from multiple sources.
|
|
||||||
If you see the same device, error count, or technical details mentioned multiple times,
|
DISK / SMART / STORAGE (disk_io_error / storage_unavailable):
|
||||||
consolidate them into a single, clear statement. Never repeat the same information twice.
|
- Preserve device, specific error, failing attribute, and counts if present.
|
||||||
|
- Do NOT repeat the same disk fact twice.
|
||||||
|
|
||||||
RESOURCES (cpu_high / ram_high / temp_high / load_high):
|
RESOURCES (cpu_high / ram_high / temp_high / load_high):
|
||||||
Input contains: current value, threshold, core count
|
- Preserve current value, threshold, and context if present.
|
||||||
Output: current value vs threshold, context if available
|
|
||||||
|
|
||||||
SECURITY (auth_fail / ip_block):
|
SECURITY (auth_fail / ip_block):
|
||||||
Input contains: source IP, user, service, jail, failure count
|
- Keep source IP, user, service, jail, and failure count on separate clear lines if present.
|
||||||
Output: list each field on its own line
|
|
||||||
|
|
||||||
VM/CT LIFECYCLE (vm_start, vm_stop, vm_fail, ct_*, migration_*, replication_*):
|
VM / CT LIFECYCLE (vm_*, ct_*, migration_*, replication_*):
|
||||||
Input contains: VM name, ID, target node (migrations), reason (failures)
|
- Keep name, ID, state, reason, and target node if present.
|
||||||
Output: one or two lines confirming the event with key facts
|
- Keep lifecycle messages compact unless detail_level is detailed.
|
||||||
|
|
||||||
CLUSTER (split_brain / node_disconnect / node_reconnect):
|
CLUSTER / HEALTH:
|
||||||
Input: node name, quorum status
|
- Preserve node name, quorum, category, severity, duration, and reason if present.
|
||||||
Output: state change + quorum value
|
|
||||||
|
|
||||||
HEALTH (new_error / error_resolved / health_persistent / health_degraded):
|
═══ OUTPUT FORMAT ═══
|
||||||
Input: category, severity, duration, reason
|
|
||||||
Output: what changed, in which category, for how long (if resolved)
|
|
||||||
|
|
||||||
CRITICAL:
|
|
||||||
- [TITLE] on its own line, title text on the very next line — no blank line between them
|
|
||||||
- [BODY] on its own line, body text starting on the very next line — no blank line between them
|
|
||||||
- Do NOT write "Title:", "Body:", or any label substituting the markers
|
|
||||||
- Do NOT include the literal words TITLE or BODY anywhere in the translated content
|
|
||||||
|
|
||||||
═══ OUTPUT FORMAT (follow exactly — parsers rely on these markers) ═══
|
|
||||||
[TITLE]
|
[TITLE]
|
||||||
translated title here
|
translated title here
|
||||||
[BODY]
|
[BODY]
|
||||||
translated body here"""
|
translated body here
|
||||||
|
|
||||||
|
CRITICAL OUTPUT RULES:
|
||||||
|
- Write [TITLE] on its own line
|
||||||
|
- Write the title on the next line
|
||||||
|
- Write [BODY] on its own line
|
||||||
|
- Write the body starting on the next line
|
||||||
|
- Do NOT replace these markers with "Title:" or "Body:"
|
||||||
|
- Do NOT include any extra text before or after the formatted result
|
||||||
|
- Do NOT add blank lines between [TITLE] and the title
|
||||||
|
- Do NOT add blank lines between [BODY] and the first body line"""
|
||||||
|
|
||||||
# Emoji instructions injected into AI_SYSTEM_PROMPT for rich channels (Telegram, Discord, Pushover)
|
# Emoji instructions injected into AI_SYSTEM_PROMPT for rich channels (Telegram, Discord, Pushover)
|
||||||
AI_EMOJI_INSTRUCTIONS = """
|
AI_EMOJI_INSTRUCTIONS = """
|
||||||
@@ -1485,135 +1613,10 @@ A blank line must be completely empty — no emoji, no spaces.
|
|||||||
🟢 new version (pve_update)
|
🟢 new version (pve_update)
|
||||||
|
|
||||||
|
|
||||||
BLANK LINES FOR READABILITY — insert ONE blank line between logical sections within the body.
|
BLANK LINES:
|
||||||
Blank lines go BETWEEN groups, not before the first line or after the last line.
|
Insert one blank line only between logical sections inside the body.
|
||||||
A blank line must be completely empty — no emoji, no spaces.
|
Do not add a blank line before the first body line or after the last one.
|
||||||
|
"""
|
||||||
When to add a blank line:
|
|
||||||
- Updates: after the last count line, before the packages block
|
|
||||||
- Backup multi-VM: one blank line between each VM entry; one blank line before the summary line
|
|
||||||
- Disk/SMART errors: after the device line, before the error description lines
|
|
||||||
- VM events with a reason: after the main status line, before Reason / Node / Target lines
|
|
||||||
- Health events: after the category/status line, before duration or detail lines
|
|
||||||
|
|
||||||
EXAMPLE — CT shutdown:
|
|
||||||
[TITLE]
|
|
||||||
🔽 amd: CT alpine (101) shut down
|
|
||||||
[BODY]
|
|
||||||
🏷️ Container alpine (ID: 101)
|
|
||||||
✔️ Cleanly shut down
|
|
||||||
|
|
||||||
EXAMPLE — VM started:
|
|
||||||
[TITLE]
|
|
||||||
🚀 pve01: VM arch-linux (100) started
|
|
||||||
[BODY]
|
|
||||||
🏷️ Virtual machine arch-linux (ID: 100)
|
|
||||||
✔️ Now running
|
|
||||||
|
|
||||||
EXAMPLE — migration complete:
|
|
||||||
[TITLE]
|
|
||||||
🚚 amd: Migration complete — web01 (100)
|
|
||||||
[BODY]
|
|
||||||
🏷️ Virtual machine web01 (ID: 100)
|
|
||||||
✔️ Successfully migrated
|
|
||||||
|
|
||||||
🎯 Target: node02
|
|
||||||
|
|
||||||
EXAMPLE — updates message (no important packages):
|
|
||||||
[TITLE]
|
|
||||||
📦 amd: Updates available
|
|
||||||
[BODY]
|
|
||||||
📦 Total updates: 24
|
|
||||||
🔒 Security updates: 6
|
|
||||||
🔄 Proxmox updates: 0
|
|
||||||
⚙️ Kernel updates: 0
|
|
||||||
|
|
||||||
🗂️ Important packages:
|
|
||||||
• none
|
|
||||||
|
|
||||||
EXAMPLE — updates message (with important packages):
|
|
||||||
[TITLE]
|
|
||||||
📦 amd: Updates available
|
|
||||||
[BODY]
|
|
||||||
📦 Total updates: 90
|
|
||||||
🔒 Security updates: 6
|
|
||||||
🔄 Proxmox updates: 14
|
|
||||||
⚙️ Kernel updates: 1
|
|
||||||
|
|
||||||
🗂️ Important packages:
|
|
||||||
• pve-manager (9.1.4 -> 9.1.6)
|
|
||||||
• qemu-server (9.1.3 -> 9.1.4)
|
|
||||||
• pve-container (6.0.18 -> 6.1.2)
|
|
||||||
|
|
||||||
EXAMPLE — pve_update (new Proxmox VE version):
|
|
||||||
[TITLE]
|
|
||||||
🆕 pve01: Proxmox VE 9.1.6 available
|
|
||||||
[BODY]
|
|
||||||
🚀 A new Proxmox VE release is available.
|
|
||||||
|
|
||||||
🔹 Current: 9.1.4
|
|
||||||
🟢 New: 9.1.6
|
|
||||||
|
|
||||||
🗂️ Important packages:
|
|
||||||
📌 pve-manager (v9.1.4 ➜ v9.1.6)
|
|
||||||
|
|
||||||
EXAMPLE — backup complete with multiple VMs:
|
|
||||||
[TITLE]
|
|
||||||
💾✅ pve01: Backup complete
|
|
||||||
[BODY]
|
|
||||||
Backup job finished on storage local-bak.
|
|
||||||
|
|
||||||
🏷️ VM web01 (ID: 100)
|
|
||||||
✔️ Status: ok
|
|
||||||
💽 Size: 12.3 GiB
|
|
||||||
⏱️ Duration: 00:04:21
|
|
||||||
🗄️ Storage: vm/100/2026-03-17T22:00:08Z
|
|
||||||
|
|
||||||
🏷️ CT db (ID: 101)
|
|
||||||
✔️ Status: ok
|
|
||||||
💽 Size: 4.1 GiB
|
|
||||||
⏱️ Duration: 00:01:10
|
|
||||||
🗄️ Storage: ct/101/2026-03-17T22:04:29Z
|
|
||||||
|
|
||||||
📊 Total: 2 backups | 💾 16.4 GiB | ⏱️ 00:05:31
|
|
||||||
|
|
||||||
EXAMPLE — backup partially failed (some ok, some failed):
|
|
||||||
[TITLE]
|
|
||||||
💾❌ pve01: Backup partially failed
|
|
||||||
[BODY]
|
|
||||||
Backup job finished with errors on storage PBS2.
|
|
||||||
|
|
||||||
🏷️ VM web01 (ID: 100)
|
|
||||||
✔️ Status: ok
|
|
||||||
💽 Size: 12.3 GiB
|
|
||||||
⏱️ Duration: 00:04:21
|
|
||||||
🗄️ Storage: vm/100/2026-03-17T22:00:08Z
|
|
||||||
|
|
||||||
🏷️ VM broken (ID: 102)
|
|
||||||
❌ Status: error
|
|
||||||
💽 Size: 0 B
|
|
||||||
⏱️ Duration: 00:00:37
|
|
||||||
|
|
||||||
📊 Total: 2 backups | ❌ 1 failed | 💾 12.3 GiB | ⏱️ 00:04:58
|
|
||||||
|
|
||||||
EXAMPLE — disk I/O health warning:
|
|
||||||
[TITLE]
|
|
||||||
💥 amd: Health warning — Disk I/O errors
|
|
||||||
[BODY]
|
|
||||||
💿 Device: /dev/sda
|
|
||||||
|
|
||||||
⚠️ 1 sector currently unreadable (pending)
|
|
||||||
📝 Disk reports sectors in pending reallocation state
|
|
||||||
|
|
||||||
EXAMPLE — health degraded (multiple issues):
|
|
||||||
[TITLE]
|
|
||||||
⚠️ amd: 2 health checks degraded
|
|
||||||
[BODY]
|
|
||||||
💥 Disk I/O error on /dev/sda: 1 sector currently unreadable (pending)
|
|
||||||
|
|
||||||
🏷️ Container CT 9005: ❌ failed to start
|
|
||||||
🏷️ Container CT 9004: ❌ failed to start
|
|
||||||
🏷️ Container CT 9002: ❌ failed to start"""
|
|
||||||
|
|
||||||
|
|
||||||
# No emoji instructions for email/plain text channels
|
# No emoji instructions for email/plain text channels
|
||||||
|
|||||||
@@ -120,7 +120,7 @@ class _StartupGraceState:
|
|||||||
with self._lock:
|
with self._lock:
|
||||||
return time.time() - self._startup_time
|
return time.time() - self._startup_time
|
||||||
|
|
||||||
# ─── Shutdown Tracking ───────────────────────────────────────────────────
|
# ─── Shutdown Tracking ────────────────────────────────────────<EFBFBD><EFBFBD>──────────
|
||||||
|
|
||||||
def mark_shutdown(self):
|
def mark_shutdown(self):
|
||||||
"""
|
"""
|
||||||
@@ -231,6 +231,219 @@ def was_startup_aggregated() -> bool:
|
|||||||
return _state.was_startup_aggregated()
|
return _state.was_startup_aggregated()
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Startup Report Collection ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
def collect_startup_report() -> dict:
|
||||||
|
"""
|
||||||
|
Collect comprehensive startup report data.
|
||||||
|
|
||||||
|
Called at the end of the grace period to generate a complete
|
||||||
|
startup report including:
|
||||||
|
- VMs/CTs that started successfully
|
||||||
|
- VMs/CTs that failed to start
|
||||||
|
- Service status
|
||||||
|
- Storage status
|
||||||
|
- Journal errors during boot (for AI enrichment)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with startup report data
|
||||||
|
"""
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
report = {
|
||||||
|
# VMs/CTs
|
||||||
|
'vms_started': [],
|
||||||
|
'cts_started': [],
|
||||||
|
'vms_failed': [],
|
||||||
|
'cts_failed': [],
|
||||||
|
|
||||||
|
# System status
|
||||||
|
'services_ok': True,
|
||||||
|
'services_failed': [],
|
||||||
|
'storage_ok': True,
|
||||||
|
'storage_unavailable': [],
|
||||||
|
|
||||||
|
# Health summary
|
||||||
|
'health_status': 'OK',
|
||||||
|
'health_issues': [],
|
||||||
|
|
||||||
|
# For AI enrichment
|
||||||
|
'_journal_context': '',
|
||||||
|
'_startup_errors': [],
|
||||||
|
|
||||||
|
# Metadata
|
||||||
|
'startup_duration_seconds': get_startup_elapsed(),
|
||||||
|
'timestamp': int(time.time()),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get VMs/CTs that started during boot
|
||||||
|
startup_vms = get_and_clear_startup_vms()
|
||||||
|
for vmid, vmname, vm_type in startup_vms:
|
||||||
|
if vm_type == 'vm':
|
||||||
|
report['vms_started'].append({'vmid': vmid, 'name': vmname})
|
||||||
|
else:
|
||||||
|
report['cts_started'].append({'vmid': vmid, 'name': vmname})
|
||||||
|
|
||||||
|
# Try to get health status from health_monitor
|
||||||
|
try:
|
||||||
|
import health_monitor
|
||||||
|
health_data = health_monitor.get_detailed_status()
|
||||||
|
|
||||||
|
if health_data:
|
||||||
|
report['health_status'] = health_data.get('overall_status', 'UNKNOWN')
|
||||||
|
|
||||||
|
# Check storage
|
||||||
|
storage_cat = health_data.get('categories', {}).get('storage', {})
|
||||||
|
if storage_cat.get('status') in ['CRITICAL', 'WARNING']:
|
||||||
|
report['storage_ok'] = False
|
||||||
|
for check in storage_cat.get('checks', []):
|
||||||
|
if check.get('status') in ['CRITICAL', 'WARNING', 'error']:
|
||||||
|
report['storage_unavailable'].append({
|
||||||
|
'name': check.get('name', 'unknown'),
|
||||||
|
'reason': check.get('reason', check.get('message', ''))
|
||||||
|
})
|
||||||
|
|
||||||
|
# Check services
|
||||||
|
services_cat = health_data.get('categories', {}).get('services', {})
|
||||||
|
if services_cat.get('status') in ['CRITICAL', 'WARNING']:
|
||||||
|
report['services_ok'] = False
|
||||||
|
for check in services_cat.get('checks', []):
|
||||||
|
if check.get('status') in ['CRITICAL', 'WARNING', 'error']:
|
||||||
|
report['services_failed'].append({
|
||||||
|
'name': check.get('name', 'unknown'),
|
||||||
|
'reason': check.get('reason', check.get('message', ''))
|
||||||
|
})
|
||||||
|
|
||||||
|
# Check VMs category for failed VMs
|
||||||
|
vms_cat = health_data.get('categories', {}).get('vms', {})
|
||||||
|
for check in vms_cat.get('checks', []):
|
||||||
|
if check.get('status') in ['CRITICAL', 'WARNING', 'error']:
|
||||||
|
# Determine if VM or CT based on name/type
|
||||||
|
check_name = check.get('name', '')
|
||||||
|
check_reason = check.get('reason', check.get('message', ''))
|
||||||
|
if 'error al iniciar' in check_reason.lower() or 'failed to start' in check_reason.lower():
|
||||||
|
if 'CT' in check_name or 'Container' in check_name:
|
||||||
|
report['cts_failed'].append({
|
||||||
|
'name': check_name,
|
||||||
|
'reason': check_reason
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
report['vms_failed'].append({
|
||||||
|
'name': check_name,
|
||||||
|
'reason': check_reason
|
||||||
|
})
|
||||||
|
|
||||||
|
# Collect all health issues for summary
|
||||||
|
for cat_name, cat_data in health_data.get('categories', {}).items():
|
||||||
|
if cat_data.get('status') in ['CRITICAL', 'WARNING']:
|
||||||
|
report['health_issues'].append({
|
||||||
|
'category': cat_name,
|
||||||
|
'status': cat_data.get('status'),
|
||||||
|
'reason': cat_data.get('reason', '')
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
report['_startup_errors'].append(f"Error getting health data: {e}")
|
||||||
|
|
||||||
|
# Get journal errors during startup (for AI enrichment)
|
||||||
|
try:
|
||||||
|
boot_time = int(_state._startup_time)
|
||||||
|
result = subprocess.run(
|
||||||
|
['journalctl', '-p', 'err', '--since', f'@{boot_time}', '--no-pager', '-n', '50'],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=10
|
||||||
|
)
|
||||||
|
if result.returncode == 0 and result.stdout.strip():
|
||||||
|
report['_journal_context'] = result.stdout.strip()
|
||||||
|
except Exception as e:
|
||||||
|
report['_startup_errors'].append(f"Error getting journal: {e}")
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
|
||||||
|
def format_startup_summary(report: dict) -> str:
|
||||||
|
"""
|
||||||
|
Format a human-readable startup summary from report data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
report: Dictionary from collect_startup_report()
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted summary string
|
||||||
|
"""
|
||||||
|
lines = []
|
||||||
|
|
||||||
|
# Count totals
|
||||||
|
vms_ok = len(report.get('vms_started', []))
|
||||||
|
cts_ok = len(report.get('cts_started', []))
|
||||||
|
vms_fail = len(report.get('vms_failed', []))
|
||||||
|
cts_fail = len(report.get('cts_failed', []))
|
||||||
|
|
||||||
|
total_ok = vms_ok + cts_ok
|
||||||
|
total_fail = vms_fail + cts_fail
|
||||||
|
|
||||||
|
# Determine overall status
|
||||||
|
has_issues = (
|
||||||
|
total_fail > 0 or
|
||||||
|
not report.get('services_ok', True) or
|
||||||
|
not report.get('storage_ok', True) or
|
||||||
|
report.get('health_status') in ['CRITICAL', 'WARNING']
|
||||||
|
)
|
||||||
|
|
||||||
|
# Header
|
||||||
|
if has_issues:
|
||||||
|
issue_count = total_fail + len(report.get('services_failed', [])) + len(report.get('storage_unavailable', []))
|
||||||
|
lines.append(f"System startup - {issue_count} issue(s) detected")
|
||||||
|
else:
|
||||||
|
lines.append("System startup completed")
|
||||||
|
lines.append("All systems operational.")
|
||||||
|
|
||||||
|
# VMs/CTs started
|
||||||
|
if total_ok > 0:
|
||||||
|
parts = []
|
||||||
|
if vms_ok > 0:
|
||||||
|
parts.append(f"{vms_ok} VM{'s' if vms_ok > 1 else ''}")
|
||||||
|
if cts_ok > 0:
|
||||||
|
parts.append(f"{cts_ok} CT{'s' if cts_ok > 1 else ''}")
|
||||||
|
|
||||||
|
# List names
|
||||||
|
names = []
|
||||||
|
for vm in report.get('vms_started', []):
|
||||||
|
names.append(f"{vm['name']} ({vm['vmid']})")
|
||||||
|
for ct in report.get('cts_started', []):
|
||||||
|
names.append(f"{ct['name']} ({ct['vmid']})")
|
||||||
|
|
||||||
|
line = f"{' and '.join(parts)} started"
|
||||||
|
if names and len(names) <= 5:
|
||||||
|
line += f": {', '.join(names)}"
|
||||||
|
elif names:
|
||||||
|
line += f": {', '.join(names[:3])}... (+{len(names)-3} more)"
|
||||||
|
lines.append(line)
|
||||||
|
|
||||||
|
# Failed VMs/CTs
|
||||||
|
if total_fail > 0:
|
||||||
|
for vm in report.get('vms_failed', []):
|
||||||
|
lines.append(f"VM failed: {vm['name']} - {vm.get('reason', 'unknown error')}")
|
||||||
|
for ct in report.get('cts_failed', []):
|
||||||
|
lines.append(f"CT failed: {ct['name']} - {ct.get('reason', 'unknown error')}")
|
||||||
|
|
||||||
|
# Storage issues
|
||||||
|
if not report.get('storage_ok', True):
|
||||||
|
unavailable = report.get('storage_unavailable', [])
|
||||||
|
if unavailable:
|
||||||
|
names = [s['name'] for s in unavailable]
|
||||||
|
lines.append(f"Storage: {len(unavailable)} unavailable ({', '.join(names[:3])})")
|
||||||
|
|
||||||
|
# Service issues
|
||||||
|
if not report.get('services_ok', True):
|
||||||
|
failed = report.get('services_failed', [])
|
||||||
|
if failed:
|
||||||
|
names = [s['name'] for s in failed]
|
||||||
|
lines.append(f"Services: {len(failed)} failed ({', '.join(names[:3])})")
|
||||||
|
|
||||||
|
return '\n'.join(lines)
|
||||||
|
|
||||||
|
|
||||||
# ─── For backwards compatibility ─────────────────────────────────────────────
|
# ─── For backwards compatibility ─────────────────────────────────────────────
|
||||||
|
|
||||||
# Expose constants for external use
|
# Expose constants for external use
|
||||||
|
|||||||
Reference in New Issue
Block a user