mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-29 10:56:26 +00:00
Update notification service
This commit is contained in:
@@ -110,7 +110,7 @@ export function ProxmoxDashboard() {
|
|||||||
})
|
})
|
||||||
setIsServerConnected(true)
|
setIsServerConnected(true)
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("[v0] Failed to fetch system data from Flask server:", error)
|
// Expected to fail in v0 preview (no Flask server)
|
||||||
|
|
||||||
setIsServerConnected(false)
|
setIsServerConnected(false)
|
||||||
setSystemStatus((prev) => ({
|
setSystemStatus((prev) => ({
|
||||||
|
|||||||
@@ -19,29 +19,19 @@ export const API_PORT = process.env.NEXT_PUBLIC_API_PORT || "8008"
|
|||||||
*/
|
*/
|
||||||
export function getApiBaseUrl(): string {
|
export function getApiBaseUrl(): string {
|
||||||
if (typeof window === "undefined") {
|
if (typeof window === "undefined") {
|
||||||
console.log("[v0] getApiBaseUrl: Running on server (SSR)")
|
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
const { protocol, hostname, port } = window.location
|
const { protocol, hostname, port } = window.location
|
||||||
|
|
||||||
console.log("[v0] getApiBaseUrl - protocol:", protocol, "hostname:", hostname, "port:", port)
|
|
||||||
|
|
||||||
// If accessing via standard ports (80/443) or no port, assume we're behind a proxy
|
// If accessing via standard ports (80/443) or no port, assume we're behind a proxy
|
||||||
// In this case, use relative URLs so the proxy handles routing
|
// In this case, use relative URLs so the proxy handles routing
|
||||||
const isStandardPort = port === "" || port === "80" || port === "443"
|
const isStandardPort = port === "" || port === "80" || port === "443"
|
||||||
|
|
||||||
console.log("[v0] getApiBaseUrl - isStandardPort:", isStandardPort)
|
|
||||||
|
|
||||||
if (isStandardPort) {
|
if (isStandardPort) {
|
||||||
// Behind a proxy - use relative URL
|
|
||||||
console.log("[v0] getApiBaseUrl: Detected proxy access, using relative URLs")
|
|
||||||
return ""
|
return ""
|
||||||
} else {
|
} else {
|
||||||
// Direct access - use explicit API port
|
return `${protocol}//${hostname}:${API_PORT}`
|
||||||
const baseUrl = `${protocol}//${hostname}:${API_PORT}`
|
|
||||||
console.log("[v0] getApiBaseUrl: Direct access detected, using:", baseUrl)
|
|
||||||
return baseUrl
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -69,12 +59,7 @@ export function getAuthToken(): string | null {
|
|||||||
if (typeof window === "undefined") {
|
if (typeof window === "undefined") {
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
const token = localStorage.getItem("proxmenux-auth-token")
|
return localStorage.getItem("proxmenux-auth-token")
|
||||||
console.log(
|
|
||||||
"[v0] getAuthToken called:",
|
|
||||||
token ? `Token found (length: ${token.length})` : "No token found in localStorage",
|
|
||||||
)
|
|
||||||
return token
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -96,31 +81,20 @@ export async function fetchApi<T>(endpoint: string, options?: RequestInit): Prom
|
|||||||
|
|
||||||
if (token) {
|
if (token) {
|
||||||
headers["Authorization"] = `Bearer ${token}`
|
headers["Authorization"] = `Bearer ${token}`
|
||||||
console.log("[v0] fetchApi:", endpoint, "- Authorization header ADDED")
|
|
||||||
} else {
|
|
||||||
console.log("[v0] fetchApi:", endpoint, "- NO TOKEN - Request will fail if endpoint is protected")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
const response = await fetch(url, {
|
||||||
const response = await fetch(url, {
|
...options,
|
||||||
...options,
|
headers,
|
||||||
headers,
|
cache: "no-store",
|
||||||
cache: "no-store",
|
})
|
||||||
})
|
|
||||||
|
|
||||||
console.log("[v0] fetchApi:", endpoint, "- Response status:", response.status)
|
if (!response.ok) {
|
||||||
|
if (response.status === 401) {
|
||||||
if (!response.ok) {
|
throw new Error(`Unauthorized: ${endpoint}`)
|
||||||
if (response.status === 401) {
|
|
||||||
console.error("[v0] fetchApi: 401 UNAUTHORIZED -", endpoint, "- Token present:", !!token)
|
|
||||||
throw new Error(`Unauthorized: ${endpoint}`)
|
|
||||||
}
|
|
||||||
throw new Error(`API request failed: ${response.status} ${response.statusText}`)
|
|
||||||
}
|
}
|
||||||
|
throw new Error(`API request failed: ${response.status} ${response.statusText}`)
|
||||||
return response.json()
|
|
||||||
} catch (error) {
|
|
||||||
console.error("[v0] fetchApi error for", endpoint, ":", error)
|
|
||||||
throw error
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return response.json()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -141,6 +141,20 @@ class HealthMonitor:
|
|||||||
r'ata\d+.*hard resetting link',
|
r'ata\d+.*hard resetting link',
|
||||||
r'ata\d+.*link is slow',
|
r'ata\d+.*link is slow',
|
||||||
r'ata\d+.*COMRESET',
|
r'ata\d+.*COMRESET',
|
||||||
|
|
||||||
|
# ── ProxMenux self-referential noise ──
|
||||||
|
# The monitor reporting its OWN service failures is circular --
|
||||||
|
# it cannot meaningfully alert about itself.
|
||||||
|
r'proxmenux-monitor\.service.*Failed',
|
||||||
|
r'proxmenux-monitor\.service.*exit-code',
|
||||||
|
r'ProxMenux-Monitor.*Failed at step EXEC',
|
||||||
|
|
||||||
|
# ── PVE scheduler operational noise ──
|
||||||
|
# pvescheduler emits "could not update job state" every minute
|
||||||
|
# when a scheduled job reference is stale. This is cosmetic,
|
||||||
|
# not a system problem.
|
||||||
|
r'pvescheduler.*could not update job state',
|
||||||
|
r'pvescheduler.*no such task',
|
||||||
]
|
]
|
||||||
|
|
||||||
CRITICAL_LOG_KEYWORDS = [
|
CRITICAL_LOG_KEYWORDS = [
|
||||||
|
|||||||
@@ -221,7 +221,28 @@ class HealthPersistence:
|
|||||||
conn.close()
|
conn.close()
|
||||||
return {'type': 'skipped_acknowledged', 'needs_notification': False}
|
return {'type': 'skipped_acknowledged', 'needs_notification': False}
|
||||||
else:
|
else:
|
||||||
# Suppression expired - reset as a NEW event
|
# Suppression expired.
|
||||||
|
# For log-based errors (spike, persistent, cascade),
|
||||||
|
# do NOT re-trigger. The journal always contains old
|
||||||
|
# messages, so re-creating the error would cause an
|
||||||
|
# infinite notification cycle. Instead, just delete
|
||||||
|
# the stale record so it stops appearing in the UI.
|
||||||
|
is_log_error = (
|
||||||
|
error_key.startswith('log_persistent_')
|
||||||
|
or error_key.startswith('log_spike_')
|
||||||
|
or error_key.startswith('log_cascade_')
|
||||||
|
or error_key.startswith('log_critical_')
|
||||||
|
or category == 'logs'
|
||||||
|
)
|
||||||
|
if is_log_error:
|
||||||
|
cursor.execute('DELETE FROM errors WHERE error_key = ?', (error_key,))
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
return {'type': 'skipped_expired_log', 'needs_notification': False}
|
||||||
|
|
||||||
|
# For non-log errors (hardware, services, etc.),
|
||||||
|
# re-triggering is correct -- the condition is real
|
||||||
|
# and still present.
|
||||||
cursor.execute('DELETE FROM errors WHERE error_key = ?', (error_key,))
|
cursor.execute('DELETE FROM errors WHERE error_key = ?', (error_key,))
|
||||||
cursor.execute('''
|
cursor.execute('''
|
||||||
INSERT INTO errors
|
INSERT INTO errors
|
||||||
|
|||||||
@@ -505,6 +505,7 @@ class JournalWatcher:
|
|||||||
r'user-runtime-dir@\d+', # User runtime dirs
|
r'user-runtime-dir@\d+', # User runtime dirs
|
||||||
r'systemd-coredump@', # Coredump handlers (transient)
|
r'systemd-coredump@', # Coredump handlers (transient)
|
||||||
r'run-.*\.mount', # Transient mounts
|
r'run-.*\.mount', # Transient mounts
|
||||||
|
r'proxmenux-monitor', # Self-referential: monitor can't alert about itself
|
||||||
]
|
]
|
||||||
for noise in _NOISE_PATTERNS:
|
for noise in _NOISE_PATTERNS:
|
||||||
if re.search(noise, msg) or re.search(noise, unit):
|
if re.search(noise, msg) or re.search(noise, unit):
|
||||||
@@ -741,17 +742,20 @@ class JournalWatcher:
|
|||||||
def _check_backup_start(self, msg: str, syslog_id: str):
|
def _check_backup_start(self, msg: str, syslog_id: str):
|
||||||
"""Detect backup job start from journal messages.
|
"""Detect backup job start from journal messages.
|
||||||
|
|
||||||
Matches multiple formats:
|
The message "starting new backup job: vzdump ..." is unique and
|
||||||
- pvedaemon: "INFO: starting new backup job: vzdump 110 --storage PBS-Cloud --mode stop ..."
|
definitive -- only a real vzdump invocation produces it. We match
|
||||||
- pvesh: "INFO: starting new backup job: vzdump 104 --mode stop --storage PBS-Cloud ..."
|
purely on message content, regardless of which service emitted it,
|
||||||
- vzdump: "starting new backup job: vzdump 110 --storage PBS-Cloud --mode stop ..."
|
because PVE uses different syslog identifiers depending on how the
|
||||||
- vzdump: "INFO: Starting Backup of VM 110 (qemu)" (per-guest fallback)
|
backup was triggered:
|
||||||
|
- pvescheduler (scheduled backups via /etc/pve/jobs.cfg)
|
||||||
|
- pvedaemon (GUI-triggered backups)
|
||||||
|
- pvesh (CLI / API-triggered backups)
|
||||||
|
- vzdump (per-guest "Starting Backup of VM ..." lines)
|
||||||
|
|
||||||
PVE emits from pvedaemon for scheduled backups, from pvesh for
|
Trying to maintain a whitelist of syslog_ids is fragile -- new PVE
|
||||||
API/CLI-triggered backups, and from vzdump for the per-guest lines.
|
versions or plugins may introduce more. The message pattern itself
|
||||||
|
is the reliable indicator.
|
||||||
"""
|
"""
|
||||||
if syslog_id not in ('pvedaemon', 'pvesh', 'vzdump', ''):
|
|
||||||
return
|
|
||||||
|
|
||||||
# Primary pattern: full vzdump command with all arguments
|
# Primary pattern: full vzdump command with all arguments
|
||||||
# Matches both "INFO: starting new backup job: vzdump ..." and
|
# Matches both "INFO: starting new backup job: vzdump ..." and
|
||||||
@@ -887,49 +891,45 @@ class JournalWatcher:
|
|||||||
}, entity='cluster', entity_id=node_name)
|
}, entity='cluster', entity_id=node_name)
|
||||||
|
|
||||||
def _check_system_shutdown(self, msg: str, syslog_id: str):
|
def _check_system_shutdown(self, msg: str, syslog_id: str):
|
||||||
"""Detect system shutdown/reboot.
|
"""Detect full-node shutdown or reboot.
|
||||||
|
|
||||||
Matches multiple systemd signals that indicate the node is going down:
|
ONLY matches definitive signals from PID 1 (systemd) that prove
|
||||||
- "Shutting down." (systemd PID 1)
|
the entire node is going down -- NOT individual service restarts.
|
||||||
- "System is powering off." / "System is rebooting."
|
|
||||||
- "Reached target Shutdown." / "Reached target Reboot."
|
Severity is INFO, not CRITICAL, because:
|
||||||
- "Journal stopped" (very late in shutdown)
|
- A planned shutdown/reboot is an administrative action, not an emergency.
|
||||||
- "The system will reboot now!" / "The system will power off now!"
|
- If the node truly crashes, the monitor dies before it can send anything.
|
||||||
|
- Proxmox itself treats these as informational notifications.
|
||||||
"""
|
"""
|
||||||
msg_lower = msg.lower()
|
# Strict syslog_id filter: only systemd PID 1 and systemd-logind
|
||||||
|
# emit authoritative node-level shutdown messages.
|
||||||
|
if syslog_id not in ('systemd', 'systemd-logind'):
|
||||||
|
return
|
||||||
|
|
||||||
# Only process systemd / logind messages
|
msg_lower = msg.lower()
|
||||||
if not any(s in syslog_id for s in ('systemd', 'logind', '')):
|
|
||||||
if 'systemd' not in msg_lower:
|
|
||||||
return
|
|
||||||
|
|
||||||
is_reboot = False
|
is_reboot = False
|
||||||
is_shutdown = False
|
is_shutdown = False
|
||||||
|
|
||||||
# Detect reboot signals
|
# Reboot signals -- only definitive whole-system messages
|
||||||
reboot_signals = [
|
reboot_signals = [
|
||||||
'system is rebooting',
|
'system is rebooting',
|
||||||
'reached target reboot',
|
|
||||||
'the system will reboot now',
|
'the system will reboot now',
|
||||||
'starting reboot',
|
|
||||||
]
|
]
|
||||||
for sig in reboot_signals:
|
for sig in reboot_signals:
|
||||||
if sig in msg_lower:
|
if sig in msg_lower:
|
||||||
is_reboot = True
|
is_reboot = True
|
||||||
break
|
break
|
||||||
|
|
||||||
# Detect shutdown/poweroff signals
|
# Shutdown/poweroff signals -- only definitive whole-system messages.
|
||||||
|
# "shutting down" is deliberately EXCLUDED because many services emit
|
||||||
|
# it during normal restarts (e.g. "Shutting down proxy server...").
|
||||||
|
# "journal stopped" is EXCLUDED because journald can restart independently.
|
||||||
if not is_reboot:
|
if not is_reboot:
|
||||||
shutdown_signals = [
|
shutdown_signals = [
|
||||||
'system is powering off',
|
'system is powering off',
|
||||||
'system is halting',
|
'system is halting',
|
||||||
'shutting down',
|
|
||||||
'reached target shutdown',
|
|
||||||
'reached target halt',
|
|
||||||
'the system will power off now',
|
'the system will power off now',
|
||||||
'starting power-off',
|
|
||||||
'journal stopped',
|
|
||||||
'stopping journal service',
|
|
||||||
]
|
]
|
||||||
for sig in shutdown_signals:
|
for sig in shutdown_signals:
|
||||||
if sig in msg_lower:
|
if sig in msg_lower:
|
||||||
@@ -937,13 +937,13 @@ class JournalWatcher:
|
|||||||
break
|
break
|
||||||
|
|
||||||
if is_reboot:
|
if is_reboot:
|
||||||
self._emit('system_reboot', 'CRITICAL', {
|
self._emit('system_reboot', 'INFO', {
|
||||||
'reason': msg[:200],
|
'reason': 'The system is rebooting.',
|
||||||
'hostname': self._hostname,
|
'hostname': self._hostname,
|
||||||
}, entity='node', entity_id='')
|
}, entity='node', entity_id='')
|
||||||
elif is_shutdown:
|
elif is_shutdown:
|
||||||
self._emit('system_shutdown', 'CRITICAL', {
|
self._emit('system_shutdown', 'INFO', {
|
||||||
'reason': msg[:200],
|
'reason': 'The system is shutting down.',
|
||||||
'hostname': self._hostname,
|
'hostname': self._hostname,
|
||||||
}, entity='node', entity_id='')
|
}, entity='node', entity_id='')
|
||||||
|
|
||||||
@@ -1832,11 +1832,36 @@ class ProxmoxHookWatcher:
|
|||||||
'hostname': pve_hostname,
|
'hostname': pve_hostname,
|
||||||
'pve_type': pve_type,
|
'pve_type': pve_type,
|
||||||
'pve_message': message,
|
'pve_message': message,
|
||||||
'pve_title': title,
|
'pve_title': title or event_type,
|
||||||
'title': title,
|
'title': title or event_type,
|
||||||
'job_id': pve_job_id,
|
'job_id': pve_job_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ── Extract clean reason for system-mail events ──
|
||||||
|
# smartd and other system mail contains verbose boilerplate.
|
||||||
|
# Extract just the actionable warning/error lines.
|
||||||
|
if pve_type == 'system-mail' and message:
|
||||||
|
clean_lines = []
|
||||||
|
for line in message.split('\n'):
|
||||||
|
stripped = line.strip()
|
||||||
|
# Skip boilerplate lines
|
||||||
|
if not stripped:
|
||||||
|
continue
|
||||||
|
if stripped.startswith('This message was generated'):
|
||||||
|
continue
|
||||||
|
if stripped.startswith('For details see'):
|
||||||
|
continue
|
||||||
|
if stripped.startswith('You can also use'):
|
||||||
|
continue
|
||||||
|
if stripped.startswith('The original message'):
|
||||||
|
continue
|
||||||
|
if stripped.startswith('Another message will'):
|
||||||
|
continue
|
||||||
|
if stripped.startswith('host name:') or stripped.startswith('DNS domain:'):
|
||||||
|
continue
|
||||||
|
clean_lines.append(stripped)
|
||||||
|
data['reason'] = '\n'.join(clean_lines).strip() if clean_lines else message.strip()[:500]
|
||||||
|
|
||||||
# Extract VMID and VM name from message for vzdump events
|
# Extract VMID and VM name from message for vzdump events
|
||||||
if pve_type == 'vzdump' and message:
|
if pve_type == 'vzdump' and message:
|
||||||
# PVE vzdump messages contain lines like:
|
# PVE vzdump messages contain lines like:
|
||||||
@@ -1902,8 +1927,27 @@ class ProxmoxHookWatcher:
|
|||||||
if pve_type == 'package-updates':
|
if pve_type == 'package-updates':
|
||||||
return 'update_available', 'node', ''
|
return 'update_available', 'node', ''
|
||||||
|
|
||||||
if pve_type == 'system-mail':
|
if pve_type == 'system-mail':
|
||||||
return 'system_mail', 'node', ''
|
# Parse smartd messages to extract useful info and filter noise.
|
||||||
|
# smartd sends system-mail when it detects SMART issues.
|
||||||
|
msg_lower = (message or '').lower()
|
||||||
|
title_lower_sm = (title or '').lower()
|
||||||
|
|
||||||
|
# ── Filter smartd noise ──
|
||||||
|
# FailedReadSmartErrorLog: smartd can't read the error log -- this is
|
||||||
|
# a firmware quirk on some WD/Seagate drives, NOT a disk failure.
|
||||||
|
# FailedReadSmartData: similar firmware issue.
|
||||||
|
# These should NOT generate notifications.
|
||||||
|
smartd_noise = [
|
||||||
|
'failedreadsmarterrorlog',
|
||||||
|
'failedreadsmartdata',
|
||||||
|
'failedopendevice', # drive was temporarily unavailable
|
||||||
|
]
|
||||||
|
for noise in smartd_noise:
|
||||||
|
if noise in title_lower_sm or noise in msg_lower:
|
||||||
|
return '_skip', '', ''
|
||||||
|
|
||||||
|
return 'system_mail', 'node', ''
|
||||||
|
|
||||||
# ── Fallback for unknown/empty pve_type ──
|
# ── Fallback for unknown/empty pve_type ──
|
||||||
# (e.g. test notifications, future PVE event types)
|
# (e.g. test notifications, future PVE event types)
|
||||||
|
|||||||
@@ -561,13 +561,13 @@ TEMPLATES = {
|
|||||||
# ── System events ──
|
# ── System events ──
|
||||||
'system_shutdown': {
|
'system_shutdown': {
|
||||||
'title': '{hostname}: System shutting down',
|
'title': '{hostname}: System shutting down',
|
||||||
'body': 'The system is shutting down.\n{reason}',
|
'body': '{reason}',
|
||||||
'group': 'system',
|
'group': 'system',
|
||||||
'default_enabled': True,
|
'default_enabled': True,
|
||||||
},
|
},
|
||||||
'system_reboot': {
|
'system_reboot': {
|
||||||
'title': '{hostname}: System rebooting',
|
'title': '{hostname}: System rebooting',
|
||||||
'body': 'The system is rebooting.\n{reason}',
|
'body': '{reason}',
|
||||||
'group': 'system',
|
'group': 'system',
|
||||||
'default_enabled': True,
|
'default_enabled': True,
|
||||||
},
|
},
|
||||||
@@ -583,6 +583,12 @@ TEMPLATES = {
|
|||||||
'group': 'system',
|
'group': 'system',
|
||||||
'default_enabled': True,
|
'default_enabled': True,
|
||||||
},
|
},
|
||||||
|
'system_mail': {
|
||||||
|
'title': '{hostname}: {pve_title}',
|
||||||
|
'body': '{reason}',
|
||||||
|
'group': 'system',
|
||||||
|
'default_enabled': True,
|
||||||
|
},
|
||||||
'update_available': {
|
'update_available': {
|
||||||
'title': '{hostname}: Updates available',
|
'title': '{hostname}: Updates available',
|
||||||
'body': 'Total updates: {total_count}\nSecurity: {security_count}\nProxmox: {pve_count}\nKernel: {kernel_count}\nImportant: {important_list}',
|
'body': 'Total updates: {total_count}\nSecurity: {security_count}\nProxmox: {pve_count}\nKernel: {kernel_count}\nImportant: {important_list}',
|
||||||
|
|||||||
Reference in New Issue
Block a user