"""
Health Monitor Persistence Module
Manages persistent error tracking across AppImage updates using SQLite.
Stores errors in /usr/local/share/proxmenux/health_monitor.db
(same directory as monitor.db for temperature history)

Features:
- Persistent error storage (survives AppImage updates)
- Smart error resolution (auto-clear when VM starts, or after 48h)
- Event system for future Telegram notifications
- Manual acknowledgment support

Author: MacRimi
Version: 1.1
"""

import sqlite3
import json
import os
import threading
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
from pathlib import Path

class HealthPersistence:
    """Manages persistent health error tracking"""
    
    # Error retention periods (seconds)
    VM_ERROR_RETENTION = 48 * 3600  # 48 hours
    LOG_ERROR_RETENTION = 24 * 3600  # 24 hours
    DISK_ERROR_RETENTION = 48 * 3600  # 48 hours
    
    # Default suppression: 24 hours (user can change per-category in settings)
    DEFAULT_SUPPRESSION_HOURS = 24
    
    # Mapping from error categories to settings keys
    CATEGORY_SETTING_MAP = {
        'temperature': 'suppress_cpu',
        'memory': 'suppress_memory',
        'storage': 'suppress_storage',
        'disks': 'suppress_disks',
        'network': 'suppress_network',
        'vms': 'suppress_vms',
        'pve_services': 'suppress_pve_services',
        'logs': 'suppress_logs',
        'updates': 'suppress_updates',
        'security': 'suppress_security',
    }
    
    def __init__(self):
        """Initialize persistence with database in shared ProxMenux data directory"""
        self.data_dir = Path('/usr/local/share/proxmenux')
        self.data_dir.mkdir(parents=True, exist_ok=True)
        
        self.db_path = self.data_dir / 'health_monitor.db'
        self._db_lock = threading.Lock()
        self._init_database()
    
    def _get_conn(self) -> sqlite3.Connection:
        """Get a SQLite connection with timeout and WAL mode for safe concurrency."""
        conn = sqlite3.connect(str(self.db_path), timeout=10)
        conn.execute('PRAGMA journal_mode=WAL')
        conn.execute('PRAGMA busy_timeout=5000')
        return conn
    
    def _init_database(self):
        """Initialize SQLite database with required tables"""
        conn = self._get_conn()
        cursor = conn.cursor()
        
        # Errors table
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS errors (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                error_key TEXT UNIQUE NOT NULL,
                category TEXT NOT NULL,
                severity TEXT NOT NULL,
                reason TEXT NOT NULL,
                details TEXT,
                first_seen TEXT NOT NULL,
                last_seen TEXT NOT NULL,
                resolved_at TEXT,
                acknowledged INTEGER DEFAULT 0,
                notification_sent INTEGER DEFAULT 0
            )
        ''')
        
        # Events table (for future Telegram notifications)
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS events (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                event_type TEXT NOT NULL,
                error_key TEXT NOT NULL,
                timestamp TEXT NOT NULL,
                data TEXT
            )
        ''')
        
        # System capabilities table (detected once, cached forever)
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS system_capabilities (
                cap_key TEXT PRIMARY KEY,
                cap_value TEXT NOT NULL,
                detected_at TEXT NOT NULL
            )
        ''')
        
        # User settings table (per-category suppression durations, etc.)
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS user_settings (
                setting_key TEXT PRIMARY KEY,
                setting_value TEXT NOT NULL,
                updated_at TEXT NOT NULL
            )
        ''')
        
        # Notification history table (records all sent notifications)
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS notification_history (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                event_type TEXT NOT NULL,
                channel TEXT NOT NULL,
                title TEXT,
                message TEXT,
                severity TEXT,
                sent_at TEXT NOT NULL,
                success INTEGER DEFAULT 1,
                error_message TEXT,
                source TEXT DEFAULT 'server'
            )
        ''')
        
        # Migration: add suppression_hours column to errors if not present
        cursor.execute("PRAGMA table_info(errors)")
        columns = [col[1] for col in cursor.fetchall()]
        if 'suppression_hours' not in columns:
            cursor.execute('ALTER TABLE errors ADD COLUMN suppression_hours INTEGER DEFAULT 24')
        
        # Indexes for performance
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_error_key ON errors(error_key)')
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_category ON errors(category)')
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_resolved ON errors(resolved_at)')
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_events_error ON events(error_key)')
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_notif_sent_at ON notification_history(sent_at)')
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_notif_severity ON notification_history(severity)')
        
        conn.commit()
        conn.close()
    
    def record_error(self, error_key: str, category: str, severity: str, 
                    reason: str, details: Optional[Dict] = None) -> Dict[str, Any]:
        """
        Record or update an error.
        Returns event info (new_error, updated, etc.)
        """
        with self._db_lock:
            return self._record_error_impl(error_key, category, severity, reason, details)
    
    def _record_error_impl(self, error_key, category, severity, reason, details):
        conn = self._get_conn()
        cursor = conn.cursor()
        
        now = datetime.now().isoformat()
        details_json = json.dumps(details) if details else None
        
        cursor.execute('''
            SELECT id, acknowledged, resolved_at, category, severity, first_seen, 
                   notification_sent, suppression_hours
            FROM errors WHERE error_key = ?
        ''', (error_key,))
        existing = cursor.fetchone()
        
        event_info = {'type': 'updated', 'needs_notification': False}
        
        if existing:
            err_id, ack, resolved_at, old_cat, old_severity, first_seen, notif_sent, stored_suppression = existing
            
            if ack == 1:
                # SAFETY OVERRIDE: Critical CPU temperature ALWAYS re-triggers
                # regardless of any dismiss/permanent setting (hardware protection)
                if error_key == 'cpu_temperature' and severity == 'CRITICAL':
                    cursor.execute('DELETE FROM errors WHERE error_key = ?', (error_key,))
                    cursor.execute('''
                        INSERT INTO errors 
                        (error_key, category, severity, reason, details, first_seen, last_seen)
                        VALUES (?, ?, ?, ?, ?, ?, ?)
                    ''', (error_key, category, severity, reason, details_json, now, now))
                    event_info = {'type': 'new', 'needs_notification': True}
                    self._record_event(cursor, 'new', error_key, 
                                      {'severity': severity, 'reason': reason,
                                       'note': 'CRITICAL temperature override - safety alert'})
                    conn.commit()
                    conn.close()
                    return event_info
                
                # Check suppression: use per-record stored hours (set at dismiss time)
                sup_hours = stored_suppression if stored_suppression is not None else self.DEFAULT_SUPPRESSION_HOURS
                
                # Permanent dismiss (sup_hours == -1): always suppress
                if sup_hours == -1:
                    conn.close()
                    return {'type': 'skipped_acknowledged', 'needs_notification': False}
                
                # Time-limited suppression
                still_suppressed = False
                if resolved_at:
                    try:
                        resolved_dt = datetime.fromisoformat(resolved_at)
                        elapsed_hours = (datetime.now() - resolved_dt).total_seconds() / 3600
                        still_suppressed = elapsed_hours < sup_hours
                    except Exception:
                        pass
                
                if still_suppressed:
                    conn.close()
                    return {'type': 'skipped_acknowledged', 'needs_notification': False}
                else:
                    # Suppression expired - reset as a NEW event
                    cursor.execute('DELETE FROM errors WHERE error_key = ?', (error_key,))
                    cursor.execute('''
                        INSERT INTO errors 
                        (error_key, category, severity, reason, details, first_seen, last_seen)
                        VALUES (?, ?, ?, ?, ?, ?, ?)
                    ''', (error_key, category, severity, reason, details_json, now, now))
                    event_info = {'type': 'new', 'needs_notification': True}
                    self._record_event(cursor, 'new', error_key, 
                                      {'severity': severity, 'reason': reason,
                                       'note': 'Re-triggered after suppression expired'})
                    conn.commit()
                    conn.close()
                    return event_info
            
            # Not acknowledged - update existing active error
            cursor.execute('''
                UPDATE errors 
                SET last_seen = ?, severity = ?, reason = ?, details = ?
                WHERE error_key = ? AND acknowledged = 0
            ''', (now, severity, reason, details_json, error_key))
            
            # Check if severity escalated
            if old_severity == 'WARNING' and severity == 'CRITICAL':
                event_info['type'] = 'escalated'
                event_info['needs_notification'] = True
        else:
            # Insert new error
            cursor.execute('''
                INSERT INTO errors 
                (error_key, category, severity, reason, details, first_seen, last_seen)
                VALUES (?, ?, ?, ?, ?, ?, ?)
            ''', (error_key, category, severity, reason, details_json, now, now))
            
            event_info['type'] = 'new'
            event_info['needs_notification'] = True
        
        # ─── Auto-suppress: if the category has a non-default setting, ───
        # auto-dismiss immediately so the user never sees it as active.
        # Exception: CRITICAL CPU temperature is never auto-suppressed.
        if not (error_key == 'cpu_temperature' and severity == 'CRITICAL'):
            setting_key = self.CATEGORY_SETTING_MAP.get(category, '')
            if setting_key:
                stored = self.get_setting(setting_key)
                if stored is not None:
                    configured_hours = int(stored)
                    if configured_hours != self.DEFAULT_SUPPRESSION_HOURS:
                        # Non-default setting found: auto-acknowledge
                        cursor.execute('''
                            UPDATE errors 
                            SET acknowledged = 1, resolved_at = ?, suppression_hours = ?
                            WHERE error_key = ? AND acknowledged = 0
                        ''', (now, configured_hours, error_key))
                        
                        if cursor.rowcount > 0:
                            self._record_event(cursor, 'auto_suppressed', error_key, {
                                'severity': severity,
                                'reason': reason,
                                'suppression_hours': configured_hours,
                                'note': 'Auto-suppressed by user settings'
                            })
                            event_info['type'] = 'auto_suppressed'
                            event_info['needs_notification'] = False
                            conn.commit()
                            conn.close()
                            return event_info
        
        # Record event
        self._record_event(cursor, event_info['type'], error_key, 
                          {'severity': severity, 'reason': reason})
        
        conn.commit()
        conn.close()
        
        return event_info
    
    def resolve_error(self, error_key: str, reason: str = 'auto-resolved'):
        """Mark an error as resolved"""
        with self._db_lock:
            return self._resolve_error_impl(error_key, reason)
    
    def _resolve_error_impl(self, error_key, reason):
        conn = self._get_conn()
        cursor = conn.cursor()
        
        now = datetime.now().isoformat()
        
        cursor.execute('''
            UPDATE errors 
            SET resolved_at = ?
            WHERE error_key = ? AND resolved_at IS NULL
        ''', (now, error_key))
        
        if cursor.rowcount > 0:
            self._record_event(cursor, 'resolved', error_key, {'reason': reason})
        
        conn.commit()
        conn.close()
    
    def is_error_active(self, error_key: str, category: Optional[str] = None) -> bool:
        """
        Check if an error is currently active (unresolved and not acknowledged).
        Used by checks to avoid re-recording errors that are already tracked.
        """
        conn = self._get_conn()
        cursor = conn.cursor()
        
        if category:
            cursor.execute('''
                SELECT COUNT(*) FROM errors 
                WHERE error_key = ? AND category = ?
                  AND resolved_at IS NULL AND acknowledged = 0
            ''', (error_key, category))
        else:
            cursor.execute('''
                SELECT COUNT(*) FROM errors 
                WHERE error_key = ? 
                  AND resolved_at IS NULL AND acknowledged = 0
            ''', (error_key,))
        
        count = cursor.fetchone()[0]
        conn.close()
        return count > 0
    
    def clear_error(self, error_key: str):
        """
        Remove/resolve a specific error immediately.
        Used when the condition that caused the error no longer exists
        (e.g., storage became available again, CPU temp recovered).
        
        For acknowledged errors: if the condition resolved on its own,
        we delete the record entirely so it can re-trigger as a fresh
        event if the condition returns later.
        """
        conn = self._get_conn()
        cursor = conn.cursor()
        
        now = datetime.now().isoformat()
        
        # Check if this error was acknowledged (dismissed)
        cursor.execute('''
            SELECT acknowledged FROM errors WHERE error_key = ?
        ''', (error_key,))
        row = cursor.fetchone()
        
        if row and row[0] == 1:
            # Dismissed error that naturally resolved - delete entirely
            # so it can re-trigger as a new event if it happens again
            cursor.execute('DELETE FROM errors WHERE error_key = ?', (error_key,))
            if cursor.rowcount > 0:
                self._record_event(cursor, 'cleared', error_key, 
                                  {'reason': 'condition_resolved_after_dismiss'})
        else:
            # Normal active error - mark as resolved
            cursor.execute('''
                UPDATE errors 
                SET resolved_at = ?
                WHERE error_key = ? AND resolved_at IS NULL
            ''', (now, error_key))
            
            if cursor.rowcount > 0:
                self._record_event(cursor, 'cleared', error_key, {'reason': 'condition_resolved'})
        
        conn.commit()
        conn.close()
    
    def acknowledge_error(self, error_key: str) -> Dict[str, Any]:
        """
        Manually acknowledge an error (dismiss).
        - Looks up the category's configured suppression duration from user settings
        - Stores suppression_hours on the error record (snapshot at dismiss time)
        - Marks as acknowledged so it won't re-appear during the suppression period
        """
        with self._db_lock:
            return self._acknowledge_error_impl(error_key)
    
    def _acknowledge_error_impl(self, error_key):
        conn = self._get_conn()
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()
        
        now = datetime.now().isoformat()
        
        # Get current error info before acknowledging
        cursor.execute('SELECT * FROM errors WHERE error_key = ?', (error_key,))
        row = cursor.fetchone()
        
        result = {'success': False, 'error_key': error_key}
        
        if row:
            error_dict = dict(row)
            original_severity = error_dict.get('severity', 'WARNING')
            category = error_dict.get('category', '')
            
            # Look up the user's configured suppression for this category
            setting_key = self.CATEGORY_SETTING_MAP.get(category, '')
            sup_hours = self.DEFAULT_SUPPRESSION_HOURS
            if setting_key:
                stored = self.get_setting(setting_key)
                if stored is not None:
                    try:
                        sup_hours = int(stored)
                    except (ValueError, TypeError):
                        pass
            
            cursor.execute('''
                UPDATE errors 
                SET acknowledged = 1, resolved_at = ?, suppression_hours = ?
                WHERE error_key = ?
            ''', (now, sup_hours, error_key))
            
            self._record_event(cursor, 'acknowledged', error_key, {
                'original_severity': original_severity,
                'category': category,
                'suppression_hours': sup_hours
            })
            
            result = {
                'success': True,
                'error_key': error_key,
                'original_severity': original_severity,
                'category': category,
                'acknowledged_at': now,
                'suppression_hours': sup_hours
            }
        
        conn.commit()
        conn.close()
        return result
    
    def get_active_errors(self, category: Optional[str] = None) -> List[Dict[str, Any]]:
        """Get all active (unresolved) errors, optionally filtered by category"""
        conn = self._get_conn()
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()
        
        if category:
            cursor.execute('''
                SELECT * FROM errors 
                WHERE resolved_at IS NULL AND category = ?
                ORDER BY severity DESC, last_seen DESC
            ''', (category,))
        else:
            cursor.execute('''
                SELECT * FROM errors 
                WHERE resolved_at IS NULL
                ORDER BY severity DESC, last_seen DESC
            ''')
        
        rows = cursor.fetchall()
        conn.close()
        
        errors = []
        for row in rows:
            error_dict = dict(row)
            if error_dict.get('details'):
                error_dict['details'] = json.loads(error_dict['details'])
            errors.append(error_dict)
        
        return errors
    
    def cleanup_old_errors(self):
        """Clean up old resolved errors and auto-resolve stale errors"""
        with self._db_lock:
            return self._cleanup_old_errors_impl()
    
    def _cleanup_old_errors_impl(self):
        conn = self._get_conn()
        cursor = conn.cursor()
        
        now = datetime.now()
        
        # Delete resolved errors older than 7 days
        cutoff_resolved = (now - timedelta(days=7)).isoformat()
        cursor.execute('DELETE FROM errors WHERE resolved_at < ?', (cutoff_resolved,))
        
        # Auto-resolve VM/CT errors older than 48h
        cutoff_vm = (now - timedelta(seconds=self.VM_ERROR_RETENTION)).isoformat()
        cursor.execute('''
            UPDATE errors 
            SET resolved_at = ?
            WHERE category = 'vms' 
              AND resolved_at IS NULL 
              AND first_seen < ?
              AND acknowledged = 0
        ''', (now.isoformat(), cutoff_vm))
        
        # Auto-resolve log errors older than 24h
        cutoff_logs = (now - timedelta(seconds=self.LOG_ERROR_RETENTION)).isoformat()
        cursor.execute('''
            UPDATE errors 
            SET resolved_at = ?
            WHERE category = 'logs' 
              AND resolved_at IS NULL 
              AND first_seen < ?
              AND acknowledged = 0
        ''', (now.isoformat(), cutoff_logs))
        
        # Delete old events (>30 days)
        cutoff_events = (now - timedelta(days=30)).isoformat()
        cursor.execute('DELETE FROM events WHERE timestamp < ?', (cutoff_events,))
        
        conn.commit()
        conn.close()
    
    def check_vm_running(self, vm_id: str) -> bool:
        """
        Check if a VM/CT is running and resolve error if so.
        Returns True if running and error was resolved.
        """
        import subprocess
        
        try:
            # Check qm status for VMs
            result = subprocess.run(
                ['qm', 'status', vm_id],
                capture_output=True,
                text=True,
                timeout=2
            )
            
            if result.returncode == 0 and 'running' in result.stdout.lower():
                self.resolve_error(f'vm_{vm_id}', 'VM started')
                return True
            
            # Check pct status for containers
            result = subprocess.run(
                ['pct', 'status', vm_id],
                capture_output=True,
                text=True,
                timeout=2
            )
            
            if result.returncode == 0 and 'running' in result.stdout.lower():
                self.resolve_error(f'ct_{vm_id}', 'Container started')
                return True
            
            return False
            
        except Exception:
            return False
    
    def get_dismissed_errors(self) -> List[Dict[str, Any]]:
        """
        Get errors that were acknowledged/dismissed but still within suppression period.
        These are shown as INFO in the frontend with a 'Dismissed' badge.
        """
        conn = self._get_conn()
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()
        
        cursor.execute('''
            SELECT * FROM errors 
            WHERE acknowledged = 1 AND resolved_at IS NOT NULL
            ORDER BY resolved_at DESC
        ''')
        
        rows = cursor.fetchall()
        conn.close()
        
        dismissed = []
        now = datetime.now()
        
        for row in rows:
            error_dict = dict(row)
            if error_dict.get('details'):
                try:
                    error_dict['details'] = json.loads(error_dict['details'])
                except (json.JSONDecodeError, TypeError):
                    pass
            
            # Check if still within suppression period using per-record hours
            try:
                resolved_dt = datetime.fromisoformat(error_dict['resolved_at'])
                sup_hours = error_dict.get('suppression_hours')
                if sup_hours is None:
                    sup_hours = self.DEFAULT_SUPPRESSION_HOURS
                
                error_dict['dismissed'] = True
                
                if sup_hours == -1:
                    # Permanent dismiss
                    error_dict['suppression_remaining_hours'] = -1
                    error_dict['permanent'] = True
                    dismissed.append(error_dict)
                else:
                    elapsed_seconds = (now - resolved_dt).total_seconds()
                    suppression_seconds = sup_hours * 3600
                    
                    if elapsed_seconds < suppression_seconds:
                        error_dict['suppression_remaining_hours'] = round(
                            (suppression_seconds - elapsed_seconds) / 3600, 1
                        )
                        error_dict['permanent'] = False
                        dismissed.append(error_dict)
            except (ValueError, TypeError):
                pass
        
        return dismissed
    
    def emit_event(self, event_type: str, category: str, severity: str, 
                   data: Optional[Dict] = None) -> int:
        """
        Emit a health event for the notification system.
        Returns the event ID.
        
        Event types: 
        - 'state_change': severity changed (OK->WARNING, WARNING->CRITICAL, etc.)
        - 'new_error': new error detected
        - 'resolved': error resolved
        - 'escalated': severity increased
        """
        conn = self._get_conn()
        cursor = conn.cursor()
        
        event_data = data or {}
        event_data['category'] = category
        event_data['severity'] = severity
        event_data['needs_notification'] = True
        
        cursor.execute('''
            INSERT INTO events (event_type, error_key, timestamp, data)
            VALUES (?, ?, ?, ?)
        ''', (event_type, f'{category}_{severity}', datetime.now().isoformat(), 
              json.dumps(event_data)))
        
        event_id = cursor.lastrowid
        conn.commit()
        conn.close()
        return event_id
    
    def get_pending_notifications(self) -> List[Dict[str, Any]]:
        """
        Get events that need notification (for future Telegram/Gotify integration).
        Groups by severity for batch notification sending.
        """
        conn = self._get_conn()
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()
        
        cursor.execute('''
            SELECT e.*, err.category as error_category, err.reason as error_reason
            FROM events e
            LEFT JOIN errors err ON e.error_key = err.error_key
            WHERE json_extract(e.data, '$.needs_notification') = 1
            ORDER BY e.timestamp DESC
            LIMIT 100
        ''')
        
        rows = cursor.fetchall()
        conn.close()
        
        events = []
        for row in rows:
            event_dict = dict(row)
            if event_dict.get('data'):
                try:
                    event_dict['data'] = json.loads(event_dict['data'])
                except (json.JSONDecodeError, TypeError):
                    pass
            events.append(event_dict)
        
        return events
    
    def mark_events_notified(self, event_ids: List[int]):
        """Mark events as notified (notification was sent successfully)"""
        if not event_ids:
            return
        
        conn = self._get_conn()
        cursor = conn.cursor()
        
        for event_id in event_ids:
            cursor.execute('''
                UPDATE events 
                SET data = json_set(COALESCE(data, '{}'), '$.needs_notification', 0, '$.notified_at', ?)
                WHERE id = ?
            ''', (datetime.now().isoformat(), event_id))
        
        conn.commit()
        conn.close()
    
    def _record_event(self, cursor, event_type: str, error_key: str, data: Dict):
        """Internal: Record an event"""
        cursor.execute('''
            INSERT INTO events (event_type, error_key, timestamp, data)
            VALUES (?, ?, ?, ?)
        ''', (event_type, error_key, datetime.now().isoformat(), json.dumps(data)))
    
    def get_unnotified_errors(self) -> List[Dict[str, Any]]:
        """Get errors that need Telegram notification"""
        conn = self._get_conn()
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()
        
        cursor.execute('''
            SELECT * FROM errors 
            WHERE notification_sent = 0 
              AND resolved_at IS NULL 
              AND acknowledged = 0
            ORDER BY severity DESC, first_seen ASC
        ''')
        
        rows = cursor.fetchall()
        conn.close()
        
        errors = []
        for row in rows:
            error_dict = dict(row)
            if error_dict.get('details'):
                error_dict['details'] = json.loads(error_dict['details'])
            errors.append(error_dict)
        
        return errors
    
    def mark_notified(self, error_key: str):
        """Mark error as notified"""
        conn = self._get_conn()
        cursor = conn.cursor()
        
        cursor.execute('''
            UPDATE errors 
            SET notification_sent = 1
            WHERE error_key = ?
        ''', (error_key,))
        
        conn.commit()
        conn.close()
    
    # ─── System Capabilities Cache ───────────────────────────────
    
    def get_capability(self, cap_key: str) -> Optional[str]:
        """
        Get a cached system capability value.
        Returns None if not yet detected.
        """
        conn = self._get_conn()
        cursor = conn.cursor()
        cursor.execute(
            'SELECT cap_value FROM system_capabilities WHERE cap_key = ?',
            (cap_key,)
        )
        row = cursor.fetchone()
        conn.close()
        return row[0] if row else None
    
    def set_capability(self, cap_key: str, cap_value: str):
        """Store a system capability value (detected once, cached forever)."""
        conn = self._get_conn()
        cursor = conn.cursor()
        cursor.execute('''
            INSERT OR REPLACE INTO system_capabilities (cap_key, cap_value, detected_at)
            VALUES (?, ?, ?)
        ''', (cap_key, cap_value, datetime.now().isoformat()))
        conn.commit()
        conn.close()
    
    def get_all_capabilities(self) -> Dict[str, str]:
        """Get all cached system capabilities as a dict."""
        conn = self._get_conn()
        cursor = conn.cursor()
        cursor.execute('SELECT cap_key, cap_value FROM system_capabilities')
        rows = cursor.fetchall()
        conn.close()
        return {row[0]: row[1] for row in rows}
    
    # Note: System capabilities (has_zfs, has_lvm) are now derived at runtime
    # from Proxmox storage types in health_monitor.get_detailed_status()
    # This avoids redundant subprocess calls and ensures immediate detection
    # when the user adds new ZFS/LVM storage via Proxmox.
    
    # ─── User Settings ──────────────────────────────────────────
    
    def get_setting(self, key: str, default: Optional[str] = None) -> Optional[str]:
        """Get a user setting value by key."""
        conn = self._get_conn()
        cursor = conn.cursor()
        cursor.execute(
            'SELECT setting_value FROM user_settings WHERE setting_key = ?', (key,)
        )
        row = cursor.fetchone()
        conn.close()
        return row[0] if row else default
    
    def set_setting(self, key: str, value: str):
        """Store a user setting value."""
        with self._db_lock:
            conn = self._get_conn()
            cursor = conn.cursor()
            cursor.execute('''
                INSERT OR REPLACE INTO user_settings (setting_key, setting_value, updated_at)
                VALUES (?, ?, ?)
            ''', (key, value, datetime.now().isoformat()))
            conn.commit()
            conn.close()
    
    def get_all_settings(self, prefix: Optional[str] = None) -> Dict[str, str]:
        """Get all user settings, optionally filtered by key prefix."""
        conn = self._get_conn()
        cursor = conn.cursor()
        if prefix:
            cursor.execute(
                'SELECT setting_key, setting_value FROM user_settings WHERE setting_key LIKE ?',
                (f'{prefix}%',)
            )
        else:
            cursor.execute('SELECT setting_key, setting_value FROM user_settings')
        rows = cursor.fetchall()
        conn.close()
        return {row[0]: row[1] for row in rows}
    
    def sync_dismissed_suppression(self):
        """
        Retroactively update all existing dismissed errors to match current
        user settings. Called when the user saves settings, so changes are
        effective immediately on already-dismissed items.
        
        For each dismissed error, looks up its category's configured hours
        and updates the suppression_hours column to match.
        """
        conn = self._get_conn()
        cursor = conn.cursor()
        
        # Build reverse map: category -> setting_key
        cat_to_setting = {v['category']: k 
                          for k, v in self._get_category_labels().items()}
        
        # Get all current suppression settings
        current_settings = self.get_all_settings('suppress_')
        
        # Get all dismissed (acknowledged) errors
        cursor.execute('''
            SELECT id, error_key, category, suppression_hours
            FROM errors WHERE acknowledged = 1
        ''')
        dismissed = cursor.fetchall()
        
        updated_count = 0
        for err_id, error_key, category, old_hours in dismissed:
            setting_key = None
            for skey, meta in self._get_category_labels().items():
                if meta['category'] == category:
                    setting_key = skey
                    break
            
            if not setting_key:
                continue
            
            stored = current_settings.get(setting_key)
            new_hours = int(stored) if stored else self.DEFAULT_SUPPRESSION_HOURS
            
            if new_hours != old_hours:
                cursor.execute(
                    'UPDATE errors SET suppression_hours = ? WHERE id = ?',
                    (new_hours, err_id)
                )
                self._record_event(cursor, 'suppression_updated', error_key, {
                    'old_hours': old_hours,
                    'new_hours': new_hours,
                    'reason': 'settings_sync'
                })
                updated_count += 1
        
        conn.commit()
        conn.close()
        return updated_count
    
    def _get_category_labels(self) -> dict:
        """Internal helper for category label metadata."""
        return {
            'suppress_cpu': {'label': 'CPU Usage & Temperature', 'category': 'temperature', 'icon': 'cpu'},
            'suppress_memory': {'label': 'Memory & Swap', 'category': 'memory', 'icon': 'memory'},
            'suppress_storage': {'label': 'Storage Mounts & Space', 'category': 'storage', 'icon': 'storage'},
            'suppress_disks': {'label': 'Disk I/O & Errors', 'category': 'disks', 'icon': 'disk'},
            'suppress_network': {'label': 'Network Interfaces', 'category': 'network', 'icon': 'network'},
            'suppress_vms': {'label': 'VMs & Containers', 'category': 'vms', 'icon': 'vms'},
            'suppress_pve_services': {'label': 'PVE Services', 'category': 'pve_services', 'icon': 'services'},
            'suppress_logs': {'label': 'System Logs', 'category': 'logs', 'icon': 'logs'},
            'suppress_updates': {'label': 'System Updates', 'category': 'updates', 'icon': 'updates'},
            'suppress_security': {'label': 'Security & Certificates', 'category': 'security', 'icon': 'security'},
        }
    
    def get_suppression_categories(self) -> List[Dict[str, Any]]:
        """
        Get all health categories with their current suppression settings.
        Used by the settings page to render the per-category configuration.
        """
        category_labels = self._get_category_labels()
        current_settings = self.get_all_settings('suppress_')
        
        result = []
        for key, meta in category_labels.items():
            stored = current_settings.get(key)
            hours = int(stored) if stored else self.DEFAULT_SUPPRESSION_HOURS
            result.append({
                'key': key,
                'label': meta['label'],
                'category': meta['category'],
                'icon': meta['icon'],
                'hours': hours,
            })
        
        return result
    
    def get_custom_suppressions(self) -> List[Dict[str, Any]]:
        """
        Get only categories with non-default suppression settings.
        Used by the health modal to show a summary of custom suppressions.
        """
        all_cats = self.get_suppression_categories()
        return [c for c in all_cats if c['hours'] != self.DEFAULT_SUPPRESSION_HOURS]
    
    def record_unknown_persistent(self, category: str, reason: str):
        """
        Record a persistent UNKNOWN event when a health check has been
        unable to verify for >= 3 consecutive cycles (~15 min).
        Avoids duplicates by only recording once per 30 min per category.
        """
        with self._db_lock:
            self._record_unknown_persistent_impl(category, reason)
    
    def _record_unknown_persistent_impl(self, category, reason):
        try:
            event_key = f'unknown_persistent_{category}'
            now = datetime.now().isoformat()
            
            conn = self._get_conn()
            cursor = conn.cursor()
            
            # Check if we already recorded this within the last 30 minutes
            # Note: events table has columns (id, event_type, error_key, timestamp, data)
            # We use error_key for deduplication since it contains the category
            cursor.execute('''
                SELECT MAX(timestamp) FROM events 
                WHERE event_type = ? AND error_key = ?
            ''', ('unknown_persistent', event_key))
            row = cursor.fetchone()
            if row and row[0]:
                try:
                    last_recorded = datetime.fromisoformat(row[0])
                    if (datetime.now() - last_recorded).total_seconds() < 1800:
                        conn.close()
                        return  # Already recorded recently
                except (ValueError, TypeError):
                    pass  # If timestamp is malformed, proceed with recording
            
            cursor.execute('''
                INSERT INTO events (event_type, error_key, timestamp, data)
                VALUES (?, ?, ?, ?)
            ''', ('unknown_persistent', event_key, now, 
                  json.dumps({'category': category, 'reason': reason})))
            
            conn.commit()
            conn.close()
        except Exception as e:
            print(f"[HealthPersistence] Error recording UNKNOWN persistent: {e}")


# Global instance
health_persistence = HealthPersistence()