Update notification service

This commit is contained in:
MacRimi
2026-03-08 10:15:55 +01:00
parent f8b5e07518
commit 17e4227978
3 changed files with 182 additions and 180 deletions

View File

@@ -125,20 +125,13 @@ const generateLatencyReport = (report: ReportData) => {
const logoUrl = `${window.location.origin}/images/proxmenux-logo.png` const logoUrl = `${window.location.origin}/images/proxmenux-logo.png`
// Calculate stats for realtime results // Calculate stats for realtime results
const realtimeStats = report.realtimeResults.length > 0 ? (() => { const realtimeStats = report.realtimeResults.length > 0 ? {
const validResults = report.realtimeResults.filter(r => r.latency_avg !== null) min: Math.min(...report.realtimeResults.filter(r => r.latency_min !== null).map(r => r.latency_min!)),
const minValues = report.realtimeResults.filter(r => r.latency_min !== null).map(r => r.latency_min!) max: Math.max(...report.realtimeResults.filter(r => r.latency_max !== null).map(r => r.latency_max!)),
const maxValues = report.realtimeResults.filter(r => r.latency_max !== null).map(r => r.latency_max!) avg: report.realtimeResults.reduce((acc, r) => acc + (r.latency_avg || 0), 0) / report.realtimeResults.length,
const avgValues = validResults.map(r => r.latency_avg!) current: report.realtimeResults[report.realtimeResults.length - 1]?.latency_avg ?? null,
avgPacketLoss: report.realtimeResults.reduce((acc, r) => acc + (r.packet_loss || 0), 0) / report.realtimeResults.length,
return { } : null
min: minValues.length > 0 ? Math.min(...minValues) : (avgValues.length > 0 ? Math.min(...avgValues) : 0),
max: maxValues.length > 0 ? Math.max(...maxValues) : (avgValues.length > 0 ? Math.max(...avgValues) : 0),
avg: validResults.length > 0 ? validResults.reduce((acc, r) => acc + (r.latency_avg || 0), 0) / validResults.length : 0,
current: report.realtimeResults[report.realtimeResults.length - 1]?.latency_avg ?? null,
avgPacketLoss: report.realtimeResults.reduce((acc, r) => acc + (r.packet_loss || 0), 0) / report.realtimeResults.length,
}
})() : null
const statusText = report.isRealtime const statusText = report.isRealtime
? getStatusText(realtimeStats?.current ?? null) ? getStatusText(realtimeStats?.current ?? null)
@@ -177,11 +170,17 @@ const generateLatencyReport = (report: ReportData) => {
endTime: new Date(report.data[report.data.length - 1].timestamp * 1000).toLocaleString(), endTime: new Date(report.data[report.data.length - 1].timestamp * 1000).toLocaleString(),
} : null } : null
// Generate chart SVG - use average values for the line chart // Generate chart SVG - expand realtime to all 3 values (min, avg, max) per sample
const chartData = report.isRealtime const chartData = report.isRealtime
? report.realtimeResults ? report.realtimeResults.flatMap(r => {
.filter(r => r.latency_avg !== null) const points: number[] = []
.map(r => r.latency_avg!) if (r.latency_min !== null) points.push(r.latency_min)
if (r.latency_avg !== null && r.latency_avg !== r.latency_min && r.latency_avg !== r.latency_max) {
points.push(r.latency_avg)
}
if (r.latency_max !== null) points.push(r.latency_max)
return points.length > 0 ? points : [r.latency_avg ?? 0]
})
: report.data.map(d => d.value || 0) : report.data.map(d => d.value || 0)
let chartSvg = '<p style="text-align:center;color:#64748b;padding:20px;">Not enough data points for chart</p>' let chartSvg = '<p style="text-align:center;color:#64748b;padding:20px;">Not enough data points for chart</p>'
@@ -780,32 +779,30 @@ export function LatencyDetailModal({ open, onOpenChange, currentLatency }: Laten
time: new Date(point.timestamp * 1000).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' }), time: new Date(point.timestamp * 1000).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' }),
})) }))
// Use average value for the chart line (min/max are shown in stats) // Expand each sample to 3 data points (min, avg, max) for accurate representation
const realtimeChartData = realtimeResults const realtimeChartData = realtimeResults.flatMap((r, i) => {
.filter(r => r.latency_avg !== null) const time = new Date(r.timestamp || Date.now()).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit', second: '2-digit' })
.map(r => ({ const points = []
time: new Date(r.timestamp || Date.now()).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit', second: '2-digit' }), if (r.latency_min !== null) points.push({ time, value: r.latency_min, packet_loss: r.packet_loss })
value: r.latency_avg, if (r.latency_avg !== null && r.latency_avg !== r.latency_min && r.latency_avg !== r.latency_max) {
min: r.latency_min, points.push({ time, value: r.latency_avg, packet_loss: r.packet_loss })
max: r.latency_max, }
packet_loss: r.packet_loss if (r.latency_max !== null) points.push({ time, value: r.latency_max, packet_loss: r.packet_loss })
})) // If no valid points, add avg as fallback
if (points.length === 0 && r.latency_avg !== null) {
points.push({ time, value: r.latency_avg, packet_loss: r.packet_loss })
}
return points
})
// Calculate realtime stats // Calculate realtime stats
const realtimeStats = realtimeResults.length > 0 ? (() => { const realtimeStats = realtimeResults.length > 0 ? {
const validResults = realtimeResults.filter(r => r.latency_avg !== null) current: realtimeResults[realtimeResults.length - 1]?.latency_avg ?? 0,
const minValues = realtimeResults.filter(r => r.latency_min !== null).map(r => r.latency_min!) min: Math.min(...realtimeResults.filter(r => r.latency_min !== null).map(r => r.latency_min!)) || 0,
const maxValues = realtimeResults.filter(r => r.latency_max !== null).map(r => r.latency_max!) max: Math.max(...realtimeResults.filter(r => r.latency_max !== null).map(r => r.latency_max!)) || 0,
const avgValues = validResults.map(r => r.latency_avg!) avg: realtimeResults.reduce((acc, r) => acc + (r.latency_avg || 0), 0) / realtimeResults.length,
packetLoss: realtimeResults[realtimeResults.length - 1]?.packet_loss ?? 0,
return { } : null
current: realtimeResults[realtimeResults.length - 1]?.latency_avg ?? 0,
min: minValues.length > 0 ? Math.min(...minValues) : (avgValues.length > 0 ? Math.min(...avgValues) : 0),
max: maxValues.length > 0 ? Math.max(...maxValues) : (avgValues.length > 0 ? Math.max(...avgValues) : 0),
avg: validResults.length > 0 ? validResults.reduce((acc, r) => acc + (r.latency_avg || 0), 0) / validResults.length : 0,
packetLoss: realtimeResults[realtimeResults.length - 1]?.packet_loss ?? 0,
}
})() : null
const displayStats = isRealtime ? { const displayStats = isRealtime ? {
current: realtimeStats?.current ?? 0, current: realtimeStats?.current ?? 0,

View File

@@ -631,39 +631,39 @@ def init_latency_db():
return False return False
def _measure_latency(target_ip: str) -> dict: def _measure_latency(target_ip: str) -> dict:
"""Ping a target and return latency stats. Uses 3 pings and returns avg, min, max for full visibility.""" """Ping a target and return latency stats."""
try: try:
result = subprocess.run( result = subprocess.run(
['ping', '-c', '3', '-W', '2', target_ip], ['ping', '-c', '3', '-W', '2', target_ip],
capture_output=True, text=True, timeout=10 capture_output=True, text=True, timeout=10
) )
if result.returncode == 0: if result.returncode == 0:
latencies = [] latencies = []
for line in result.stdout.split('\n'): for line in result.stdout.split('\n'):
if 'time=' in line: if 'time=' in line:
try: try:
latency_str = line.split('time=')[1].split()[0] latency_str = line.split('time=')[1].split()[0]
latencies.append(float(latency_str)) latencies.append(float(latency_str))
except: except:
pass pass
if latencies: if latencies:
return { return {
'success': True, 'success': True,
'avg': round(sum(latencies) / len(latencies), 1), 'avg': round(sum(latencies) / len(latencies), 1),
'min': round(min(latencies), 1), 'min': round(min(latencies), 1),
'max': round(max(latencies), 1), 'max': round(max(latencies), 1),
'packet_loss': round((3 - len(latencies)) / 3 * 100, 1) 'packet_loss': round((3 - len(latencies)) / 3 * 100, 1)
} }
# Ping failed - 100% packet loss # Ping failed - 100% packet loss
return {'success': False, 'avg': None, 'min': None, 'max': None, 'packet_loss': 100.0} return {'success': False, 'avg': None, 'min': None, 'max': None, 'packet_loss': 100.0}
except Exception: except Exception:
return {'success': False, 'avg': None, 'min': None, 'max': None, 'packet_loss': 100.0} return {'success': False, 'avg': None, 'min': None, 'max': None, 'packet_loss': 100.0}
def _record_latency(): def _record_latency():
"""Record latency to the default gateway. Only stores the average of 3 pings.""" """Record latency to the default gateway."""
try: try:
gateway = _get_default_gateway() gateway = _get_default_gateway()
stats = _measure_latency(gateway) stats = _measure_latency(gateway)
@@ -671,9 +671,9 @@ def _record_latency():
conn = _get_temp_db() conn = _get_temp_db()
conn.execute( conn.execute(
"""INSERT INTO latency_history """INSERT INTO latency_history
(timestamp, target, latency_avg, packet_loss) (timestamp, target, latency_avg, latency_min, latency_max, packet_loss)
VALUES (?, ?, ?, ?)""", VALUES (?, ?, ?, ?, ?, ?)""",
(int(time.time()), 'gateway', stats['avg'], stats['packet_loss']) (int(time.time()), 'gateway', stats['avg'], stats['min'], stats['max'], stats['packet_loss'])
) )
conn.commit() conn.commit()
conn.close() conn.close()
@@ -718,18 +718,20 @@ def get_latency_history(target='gateway', timeframe='hour'):
if interval is None: if interval is None:
cursor = conn.execute( cursor = conn.execute(
"""SELECT timestamp, latency_avg, packet_loss """SELECT timestamp, latency_avg, latency_min, latency_max, packet_loss
FROM latency_history FROM latency_history
WHERE timestamp >= ? AND target = ? WHERE timestamp >= ? AND target = ?
ORDER BY timestamp ASC""", ORDER BY timestamp ASC""",
(since, target) (since, target)
) )
rows = cursor.fetchall() rows = cursor.fetchall()
data = [{"timestamp": r[0], "value": r[1], "packet_loss": r[2]} for r in rows if r[1] is not None] data = [{"timestamp": r[0], "value": r[1], "min": r[2], "max": r[3], "packet_loss": r[4]} for r in rows if r[1] is not None]
else: else:
cursor = conn.execute( cursor = conn.execute(
"""SELECT (timestamp / ?) * ? as bucket, """SELECT (timestamp / ?) * ? as bucket,
ROUND(AVG(latency_avg), 1) as avg_val, ROUND(AVG(latency_avg), 1) as avg_val,
ROUND(MIN(latency_min), 1) as min_val,
ROUND(MAX(latency_max), 1) as max_val,
ROUND(AVG(packet_loss), 1) as avg_loss ROUND(AVG(packet_loss), 1) as avg_loss
FROM latency_history FROM latency_history
WHERE timestamp >= ? AND target = ? WHERE timestamp >= ? AND target = ?
@@ -738,20 +740,32 @@ def get_latency_history(target='gateway', timeframe='hour'):
(interval, interval, since, target) (interval, interval, since, target)
) )
rows = cursor.fetchall() rows = cursor.fetchall()
data = [{"timestamp": r[0], "value": r[1], "packet_loss": r[2]} for r in rows if r[1] is not None] data = [{"timestamp": r[0], "value": r[1], "min": r[2], "max": r[3], "packet_loss": r[4]} for r in rows if r[1] is not None]
conn.close() conn.close()
# Compute stats using the averaged values shown in the graph # Compute stats
if data: if data:
values = [d["value"] for d in data if d["value"] is not None] values = [d["value"] for d in data if d["value"] is not None]
if values: if values:
stats = { # For gateway, use min/max of the averages (values) so stats match the graph
"min": round(min(values), 1), # For other targets (realtime), use actual min/max from individual pings
"max": round(max(values), 1), if target == 'gateway':
"avg": round(sum(values) / len(values), 1), stats = {
"current": values[-1] if values else 0 "min": round(min(values), 1),
} "max": round(max(values), 1),
"avg": round(sum(values) / len(values), 1),
"current": values[-1] if values else 0
}
else:
mins = [d["min"] for d in data if d.get("min") is not None]
maxs = [d["max"] for d in data if d.get("max") is not None]
stats = {
"min": round(min(mins) if mins else min(values), 1),
"max": round(max(maxs) if maxs else max(values), 1),
"avg": round(sum(values) / len(values), 1),
"current": values[-1] if values else 0
}
else: else:
stats = {"min": 0, "max": 0, "avg": 0, "current": 0} stats = {"min": 0, "max": 0, "avg": 0, "current": 0}
else: else:
@@ -761,69 +775,27 @@ def get_latency_history(target='gateway', timeframe='hour'):
except Exception as e: except Exception as e:
return {"data": [], "stats": {"min": 0, "max": 0, "avg": 0, "current": 0}, "target": target} return {"data": [], "stats": {"min": 0, "max": 0, "avg": 0, "current": 0}, "target": target}
def get_latest_gateway_latency():
"""Get the most recent gateway latency from the database (no new ping).
Used by health monitor to avoid duplicate pings."""
try:
conn = _get_temp_db()
cursor = conn.execute(
"""SELECT timestamp, latency_avg, packet_loss FROM latency_history
WHERE target = 'gateway' ORDER BY timestamp DESC LIMIT 1"""
)
row = cursor.fetchone()
conn.close()
if row:
timestamp, latency_avg, packet_loss = row
age = int(time.time()) - timestamp
# Only return if data is fresh (less than 2 minutes old)
if age <= 120:
return {
'latency_avg': latency_avg,
'packet_loss': packet_loss,
'timestamp': timestamp,
'age_seconds': age,
'fresh': True
}
return {'fresh': False, 'latency_avg': None}
except Exception:
return {'fresh': False, 'latency_avg': None}
def get_current_latency(target='gateway'): def get_current_latency(target='gateway'):
"""Get the most recent latency measurement for a target. """Get the most recent latency measurement for a target."""
For gateway: reads from database (already monitored every 60s).
For cloudflare/google: does a fresh ping (on-demand only)."""
try: try:
# Gateway uses stored data to avoid duplicate pings # If gateway, resolve to actual IP
if target == 'gateway': if target == 'gateway':
target_ip = _get_default_gateway() target_ip = _get_default_gateway()
stored = get_latest_gateway_latency()
if stored.get('fresh'):
return {
'target': target,
'target_ip': target_ip,
'latency_avg': stored['latency_avg'],
'packet_loss': stored['packet_loss'],
'status': 'ok' if stored['latency_avg'] and stored['latency_avg'] < 100 else 'warning'
}
# Fallback: do fresh measurement if no stored data
stats = _measure_latency(target_ip)
else: else:
# Cloudflare/Google: fresh ping (not continuously monitored) target_ip = LATENCY_TARGETS.get(target, target)
target_ip = LATENCY_TARGETS.get(target, target)
stats = _measure_latency(target_ip)
return { stats = _measure_latency(target_ip)
'target': target, return {
'target_ip': target_ip, 'target': target,
'latency_avg': stats['avg'], 'target_ip': target_ip,
'latency_min': stats.get('min'), 'latency_avg': stats['avg'],
'latency_max': stats.get('max'), 'latency_min': stats['min'],
'packet_loss': stats['packet_loss'], 'latency_max': stats['max'],
'status': 'ok' if stats['success'] and stats['avg'] and stats['avg'] < 100 else 'warning' if stats['success'] else 'error' 'packet_loss': stats['packet_loss'],
} 'status': 'ok' if stats['success'] and stats['avg'] and stats['avg'] < 100 else 'warning' if stats['success'] else 'error'
except Exception: }
return {'target': target, 'latency_avg': None, 'latency_min': None, 'latency_max': None, 'status': 'error'} except Exception:
return {'target': target, 'latency_avg': None, 'status': 'error'}
def _health_collector_loop(): def _health_collector_loop():

View File

@@ -1966,12 +1966,11 @@ class HealthMonitor:
if latency_status: if latency_status:
latency_ms = latency_status.get('latency_ms', 'N/A') latency_ms = latency_status.get('latency_ms', 'N/A')
latency_sev = latency_status.get('status', 'OK') latency_sev = latency_status.get('status', 'OK')
interface_details['connectivity'] = latency_status interface_details['connectivity'] = latency_status
target_display = latency_status.get('target', 'gateway') connectivity_check = {
connectivity_check = { 'status': latency_sev if latency_sev not in ['UNKNOWN'] else 'OK',
'status': latency_sev if latency_sev not in ['UNKNOWN'] else 'OK', 'detail': f'Latency {latency_ms}ms to gateway' if isinstance(latency_ms, (int, float)) else latency_status.get('reason', 'Unknown'),
'detail': f'Latency {latency_ms}ms to {target_display}' if isinstance(latency_ms, (int, float)) else latency_status.get('reason', 'Unknown'), }
}
if latency_sev not in ['OK', 'INFO', 'UNKNOWN']: if latency_sev not in ['OK', 'INFO', 'UNKNOWN']:
issues.append(latency_status.get('reason', 'Network latency issue')) issues.append(latency_status.get('reason', 'Network latency issue'))
else: else:
@@ -2007,48 +2006,74 @@ class HealthMonitor:
return {'status': 'UNKNOWN', 'reason': f'Network check unavailable: {str(e)}', 'checks': {}} return {'status': 'UNKNOWN', 'reason': f'Network check unavailable: {str(e)}', 'checks': {}}
def _check_network_latency(self) -> Optional[Dict[str, Any]]: def _check_network_latency(self) -> Optional[Dict[str, Any]]:
"""Check network latency using the gateway latency already monitored. """Check network latency by reading from the gateway latency monitor database.
Uses the latency data from flask_server's continuous monitoring (every 60s) Reads the most recent gateway latency measurement from the SQLite database
instead of doing a separate ping to avoid duplicate network checks. that is updated every 60 seconds by the latency monitor thread.
The gateway latency is based on the average of 3 pings to avoid false positives. This avoids redundant ping operations and uses the existing monitoring data.
""" """
cache_key = 'network_latency' cache_key = 'network_latency'
current_time = time.time() current_time = time.time()
# Use shorter cache since we're reading from DB (no network overhead)
if cache_key in self.last_check_times: if cache_key in self.last_check_times:
if current_time - self.last_check_times[cache_key] < 30: if current_time - self.last_check_times[cache_key] < 60:
return self.cached_results.get(cache_key) return self.cached_results.get(cache_key)
try: try:
# Import and use the stored gateway latency from flask_server import sqlite3
import sys db_path = "/usr/local/share/proxmenux/monitor.db"
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from flask_server import get_latest_gateway_latency, _get_default_gateway
stored = get_latest_gateway_latency() # Check if database exists
gateway_ip = _get_default_gateway() or 'gateway' if not os.path.exists(db_path):
return {'status': 'UNKNOWN', 'reason': 'Latency monitor database not available'}
if stored.get('fresh') and stored.get('latency_avg') is not None: conn = sqlite3.connect(db_path, timeout=5)
avg_latency = stored['latency_avg'] cursor = conn.execute(
packet_loss = stored.get('packet_loss', 0) """SELECT latency_avg, latency_min, latency_max, packet_loss, timestamp
FROM latency_history
WHERE target = 'gateway'
ORDER BY timestamp DESC
LIMIT 1"""
)
row = cursor.fetchone()
conn.close()
if row and row[0] is not None:
avg_latency = row[0]
min_latency = row[1]
max_latency = row[2]
packet_loss = row[3] or 0
data_age = current_time - row[4]
# If data is older than 2 minutes, consider it stale
if data_age > 120:
stale_result = {
'status': 'UNKNOWN',
'reason': 'Latency data is stale (>2 min old)'
}
self.cached_results[cache_key] = stale_result
self.last_check_times[cache_key] = current_time
return stale_result
# Check for packet loss first # Check for packet loss first
if packet_loss is not None and packet_loss >= 50: if packet_loss >= 100:
loss_result = {
'status': 'CRITICAL',
'reason': 'Packet loss to gateway (100% loss)',
'latency_ms': None,
'packet_loss': packet_loss
}
self.cached_results[cache_key] = loss_result
self.last_check_times[cache_key] = current_time
return loss_result
# Evaluate latency thresholds
if avg_latency > self.NETWORK_LATENCY_CRITICAL:
status = 'CRITICAL' status = 'CRITICAL'
reason = f'High packet loss ({packet_loss:.0f}%) to gateway' reason = f'Latency {avg_latency:.1f}ms to gateway >{self.NETWORK_LATENCY_CRITICAL}ms'
elif packet_loss is not None and packet_loss > 0:
status = 'WARNING'
reason = f'Packet loss ({packet_loss:.0f}%) to gateway'
# Check latency thresholds
elif avg_latency > self.NETWORK_LATENCY_CRITICAL:
status = 'CRITICAL'
reason = f'Latency {avg_latency:.1f}ms >{self.NETWORK_LATENCY_CRITICAL}ms to gateway'
elif avg_latency > self.NETWORK_LATENCY_WARNING: elif avg_latency > self.NETWORK_LATENCY_WARNING:
status = 'WARNING' status = 'WARNING'
reason = f'Latency {avg_latency:.1f}ms >{self.NETWORK_LATENCY_WARNING}ms to gateway' reason = f'Latency {avg_latency:.1f}ms to gateway >{self.NETWORK_LATENCY_WARNING}ms'
else: else:
status = 'OK' status = 'OK'
reason = None reason = None
@@ -2056,7 +2081,9 @@ class HealthMonitor:
latency_result = { latency_result = {
'status': status, 'status': status,
'latency_ms': round(avg_latency, 1), 'latency_ms': round(avg_latency, 1),
'target': gateway_ip, 'latency_min': round(min_latency, 1) if min_latency else None,
'latency_max': round(max_latency, 1) if max_latency else None,
'packet_loss': packet_loss,
} }
if reason: if reason:
latency_result['reason'] = reason latency_result['reason'] = reason
@@ -2065,11 +2092,17 @@ class HealthMonitor:
self.last_check_times[cache_key] = current_time self.last_check_times[cache_key] = current_time
return latency_result return latency_result
# No fresh data available - return unknown (monitoring may not be running) # No data in database yet
return {'status': 'UNKNOWN', 'reason': 'No recent latency data available'} no_data_result = {
'status': 'UNKNOWN',
'reason': 'No gateway latency data available yet'
}
self.cached_results[cache_key] = no_data_result
self.last_check_times[cache_key] = current_time
return no_data_result
except Exception as e: except Exception as e:
return {'status': 'UNKNOWN', 'reason': f'Latency check unavailable: {str(e)}'} return {'status': 'UNKNOWN', 'reason': f'Latency check failed: {str(e)}'}
def _is_vzdump_active(self) -> bool: def _is_vzdump_active(self) -> bool:
"""Check if a vzdump (backup) job is currently running.""" """Check if a vzdump (backup) job is currently running."""