mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-02-18 16:36:27 +00:00
270 lines
10 KiB
Python
270 lines
10 KiB
Python
"""
|
|
Flask routes for health monitoring with persistence support
|
|
"""
|
|
|
|
from flask import Blueprint, jsonify, request
|
|
from health_monitor import health_monitor
|
|
from health_persistence import health_persistence
|
|
|
|
health_bp = Blueprint('health', __name__)
|
|
|
|
@health_bp.route('/api/health/status', methods=['GET'])
|
|
def get_health_status():
|
|
"""Get overall health status summary"""
|
|
try:
|
|
status = health_monitor.get_overall_status()
|
|
return jsonify(status)
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@health_bp.route('/api/health/details', methods=['GET'])
|
|
def get_health_details():
|
|
"""Get detailed health status with all checks"""
|
|
try:
|
|
details = health_monitor.get_detailed_status()
|
|
return jsonify(details)
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@health_bp.route('/api/system-info', methods=['GET'])
|
|
def get_system_info():
|
|
"""
|
|
Get lightweight system info for header display.
|
|
Returns: hostname, uptime, and health status with proper structure.
|
|
"""
|
|
try:
|
|
info = health_monitor.get_system_info()
|
|
|
|
if 'health' in info:
|
|
status_map = {
|
|
'OK': 'healthy',
|
|
'WARNING': 'warning',
|
|
'CRITICAL': 'critical',
|
|
'UNKNOWN': 'warning'
|
|
}
|
|
current_status = info['health'].get('status', 'OK').upper()
|
|
info['health']['status'] = status_map.get(current_status, 'healthy')
|
|
|
|
return jsonify(info)
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@health_bp.route('/api/health/acknowledge', methods=['POST'])
|
|
def acknowledge_error():
|
|
"""
|
|
Acknowledge/dismiss an error manually.
|
|
Returns details about the acknowledged error including original severity
|
|
and suppression period info.
|
|
"""
|
|
try:
|
|
data = request.get_json()
|
|
if not data or 'error_key' not in data:
|
|
return jsonify({'error': 'error_key is required'}), 400
|
|
|
|
error_key = data['error_key']
|
|
result = health_persistence.acknowledge_error(error_key)
|
|
|
|
if result.get('success'):
|
|
# Invalidate cached health results so next fetch reflects the dismiss
|
|
# Use the error's category to clear the correct cache
|
|
category = result.get('category', '')
|
|
cache_key_map = {
|
|
'logs': 'system_logs',
|
|
'pve_services': 'pve_services',
|
|
'updates': 'updates_check',
|
|
'security': 'security_check',
|
|
'temperature': 'cpu_check',
|
|
'network': 'network_check',
|
|
'disks': 'storage_check',
|
|
'vms': 'vms_check',
|
|
}
|
|
cache_key = cache_key_map.get(category)
|
|
if cache_key:
|
|
health_monitor.last_check_times.pop(cache_key, None)
|
|
health_monitor.cached_results.pop(cache_key, None)
|
|
|
|
# Also invalidate overall status caches so header updates immediately
|
|
health_monitor.last_check_times.pop('_bg_overall', None)
|
|
health_monitor.cached_results.pop('_bg_overall', None)
|
|
health_monitor.last_check_times.pop('overall_health', None)
|
|
health_monitor.cached_results.pop('overall_health', None)
|
|
|
|
# Use the per-record suppression hours from acknowledge_error()
|
|
sup_hours = result.get('suppression_hours', 24)
|
|
if sup_hours == -1:
|
|
suppression_label = 'permanently'
|
|
elif sup_hours >= 8760:
|
|
suppression_label = f'{sup_hours // 8760} year(s)'
|
|
elif sup_hours >= 720:
|
|
suppression_label = f'{sup_hours // 720} month(s)'
|
|
elif sup_hours >= 168:
|
|
suppression_label = f'{sup_hours // 168} week(s)'
|
|
elif sup_hours >= 72:
|
|
suppression_label = f'{sup_hours // 24} day(s)'
|
|
else:
|
|
suppression_label = f'{sup_hours} hours'
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'message': f'Error dismissed for {suppression_label}',
|
|
'error_key': error_key,
|
|
'original_severity': result.get('original_severity', 'WARNING'),
|
|
'category': category,
|
|
'suppression_hours': sup_hours,
|
|
'suppression_label': suppression_label,
|
|
'acknowledged_at': result.get('acknowledged_at')
|
|
})
|
|
else:
|
|
return jsonify({
|
|
'success': False,
|
|
'message': 'Error not found or already dismissed',
|
|
'error_key': error_key
|
|
}), 404
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@health_bp.route('/api/health/active-errors', methods=['GET'])
|
|
def get_active_errors():
|
|
"""Get all active persistent errors"""
|
|
try:
|
|
category = request.args.get('category')
|
|
errors = health_persistence.get_active_errors(category)
|
|
return jsonify({'errors': errors})
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@health_bp.route('/api/health/dismissed', methods=['GET'])
|
|
def get_dismissed_errors():
|
|
"""
|
|
Get dismissed errors that are still within their suppression period.
|
|
These are shown as INFO items with a 'Dismissed' badge in the frontend.
|
|
"""
|
|
try:
|
|
dismissed = health_persistence.get_dismissed_errors()
|
|
return jsonify({'dismissed': dismissed})
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@health_bp.route('/api/health/full', methods=['GET'])
|
|
def get_full_health():
|
|
"""
|
|
Get complete health data in a single request: detailed status + active errors + dismissed.
|
|
Uses background-cached results if fresh (< 6 min) for instant response,
|
|
otherwise runs a fresh check.
|
|
"""
|
|
import time as _time
|
|
try:
|
|
# Try to use the background-cached detailed result for instant response
|
|
bg_key = '_bg_detailed'
|
|
bg_last = health_monitor.last_check_times.get(bg_key, 0)
|
|
bg_age = _time.time() - bg_last
|
|
|
|
if bg_age < 360 and bg_key in health_monitor.cached_results:
|
|
# Use cached result (at most ~5 min old)
|
|
details = health_monitor.cached_results[bg_key]
|
|
else:
|
|
# No fresh cache, run live (first load or cache expired)
|
|
details = health_monitor.get_detailed_status()
|
|
|
|
active_errors = health_persistence.get_active_errors()
|
|
dismissed = health_persistence.get_dismissed_errors()
|
|
custom_suppressions = health_persistence.get_custom_suppressions()
|
|
|
|
return jsonify({
|
|
'health': details,
|
|
'active_errors': active_errors,
|
|
'dismissed': dismissed,
|
|
'custom_suppressions': custom_suppressions,
|
|
'timestamp': details.get('timestamp')
|
|
})
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@health_bp.route('/api/health/pending-notifications', methods=['GET'])
|
|
def get_pending_notifications():
|
|
"""
|
|
Get events pending notification (for future Telegram/Gotify/Discord integration).
|
|
This endpoint will be consumed by the Notification Service (Bloque A).
|
|
"""
|
|
try:
|
|
pending = health_persistence.get_pending_notifications()
|
|
return jsonify({'pending': pending, 'count': len(pending)})
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@health_bp.route('/api/health/mark-notified', methods=['POST'])
|
|
def mark_events_notified():
|
|
"""
|
|
Mark events as notified after notification was sent successfully.
|
|
Used by the Notification Service (Bloque A) after sending alerts.
|
|
"""
|
|
try:
|
|
data = request.get_json()
|
|
if not data or 'event_ids' not in data:
|
|
return jsonify({'error': 'event_ids array is required'}), 400
|
|
|
|
event_ids = data['event_ids']
|
|
if not isinstance(event_ids, list):
|
|
return jsonify({'error': 'event_ids must be an array'}), 400
|
|
|
|
health_persistence.mark_events_notified(event_ids)
|
|
return jsonify({'success': True, 'marked_count': len(event_ids)})
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
@health_bp.route('/api/health/settings', methods=['GET'])
|
|
def get_health_settings():
|
|
"""
|
|
Get per-category suppression duration settings.
|
|
Returns all health categories with their current configured hours.
|
|
"""
|
|
try:
|
|
categories = health_persistence.get_suppression_categories()
|
|
return jsonify({'categories': categories})
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
@health_bp.route('/api/health/settings', methods=['POST'])
|
|
def save_health_settings():
|
|
"""
|
|
Save per-category suppression duration settings.
|
|
Expects JSON body with key-value pairs like: {"suppress_cpu": "168", "suppress_memory": "-1"}
|
|
Valid values: 24, 72, 168, 720, 8760, -1 (permanent), or any positive integer for custom.
|
|
"""
|
|
try:
|
|
data = request.get_json()
|
|
if not data:
|
|
return jsonify({'error': 'No settings provided'}), 400
|
|
|
|
valid_keys = set(health_persistence.CATEGORY_SETTING_MAP.values())
|
|
updated = []
|
|
|
|
for key, value in data.items():
|
|
if key not in valid_keys:
|
|
continue
|
|
|
|
try:
|
|
hours = int(value)
|
|
# Validate: must be -1 (permanent) or positive
|
|
if hours != -1 and hours < 1:
|
|
continue
|
|
health_persistence.set_setting(key, str(hours))
|
|
updated.append(key)
|
|
except (ValueError, TypeError):
|
|
continue
|
|
|
|
# Retroactively sync all existing dismissed errors
|
|
# so changes are effective immediately, not just on next dismiss
|
|
synced_count = health_persistence.sync_dismissed_suppression()
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'updated': updated,
|
|
'count': len(updated),
|
|
'synced_dismissed': synced_count
|
|
})
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|