diff --git a/scripts/telegram-notifier.sh b/scripts/telegram-notifier.sh index 9e42369..811b294 100644 --- a/scripts/telegram-notifier.sh +++ b/scripts/telegram-notifier.sh @@ -2,6 +2,11 @@ #set -x + + + + + # Configuration ============================================ REPO_URL="https://raw.githubusercontent.com/MacRimi/ProxMenux/main" BASE_DIR="/usr/local/share/proxmenux" @@ -18,9 +23,24 @@ initialize_cache CONFIG_FILE="/etc/proxmox-telegram.conf" PID_DIR="/var/run/proxmox-telegram" WRAPPER_PATH="/usr/local/bin/telegram-notifier-wrapper.sh" +t() { translate "$1"; } +declare -A IFACE_DOWN +declare -A IFACE_DOWN_TIME +disk_full_detected=false +disk_nearly_full_detected=false +inode_full_detected=false +cpu_usage_history="" +last_cpu_sustained_notification=0 +last_swap_notification=0 -# Crear archivo de configuración si no existe + + +# ================================================== +# TELEGRAM +# ================================================== + +# Create configuration file if it doesn't exist if [[ ! -f "$CONFIG_FILE" ]]; then cat < "$CONFIG_FILE" BOT_TOKEN="" @@ -55,474 +75,62 @@ ram_high=0 temp_high=0 low_disk_space=0 EOF - chmod 600 "$CONFIG_FILE" # Proteger el archivo de configuración + chmod 600 "$CONFIG_FILE" fi -# Leer configuración actual source "$CONFIG_FILE" -# Función para enviar notificaciones a Telegram (sin logging) + +######################################################################## + + send_notification() { local message="$1" - # Si el token o chat ID están vacíos → No enviar nada + if [[ -z "$BOT_TOKEN" || -z "$CHAT_ID" ]]; then return 1 fi - # Enviar notificación a Telegram (sin output) + curl -s -X POST "https://api.telegram.org/bot$BOT_TOKEN/sendMessage" \ -d "chat_id=$CHAT_ID" \ -d "text=$message" > /dev/null 2>&1 } -# Opciones para el menú -options=( - "VM y Contenedor|Inicio de VM/Contenedor|vm_start" - "VM y Contenedor|Apagado de VM/Contenedor|vm_shutdown" - "VM y Contenedor|Reinicio de VM/Contenedor|vm_restart" - "VM y Contenedor|Error en inicio de VM/Contenedor|vm_fail" - "Sistema|Nueva actualización disponible|update_available" - "Sistema|Actualización completada|update_complete" - "Sistema|Apagado del sistema|system_shutdown" - "Sistema|Problema con el sistema|system_problem" - "Sistema|Carga del sistema alta|system_load_high" - "Sistema|Kernel Panic|kernel_panic" - "Almacenamiento|Fallo de disco|disk_fail" - "Almacenamiento|Almacenamiento lleno|disk_full" - "Almacenamiento|Problemas de lectura/escritura|disk_io_error" - "Clúster|Nodo desconectado|node_disconnect" - "Clúster|Split-brain (conflicto en quorum)|split_brain" - "Red|Interfaz de red caída|network_down" - "Red|Saturación de red|network_saturation" - "Red|Problema con el firewall|firewall_issue" - "Backup y Snapshot|Backup completado|backup_complete" - "Backup y Snapshot|Backup fallido|backup_fail" - "Backup y Snapshot|Snapshot completado|snapshot_complete" - "Backup y Snapshot|Snapshot fallido|snapshot_fail" - "Seguridad|Intento de autenticación fallido|auth_fail" - "Seguridad|Bloqueos automáticos de IP|ip_block" - "Seguridad|Cambio de permisos de usuario|user_permission_change" - "Recursos|Uso alto de CPU|cpu_high" - "Recursos|Uso alto de RAM|ram_high" - "Recursos|Temperatura alta del sistema|temp_high" - "Recursos|Bajo espacio en disco|low_disk_space" -) -# Función para obtener el nombre de una VM/CT a partir de su ID -get_vm_name() { - local vmid="$1" - local name="" - - if [[ -f "/etc/pve/qemu-server/$vmid.conf" ]]; then - name=$(grep -i "^name:" "/etc/pve/qemu-server/$vmid.conf" | cut -d ' ' -f2-) - elif [[ -f "/etc/pve/lxc/$vmid.conf" ]]; then - name=$(grep -i "^hostname:" "/etc/pve/lxc/$vmid.conf" | cut -d ' ' -f2-) - fi - - # Siempre devolver el nombre y el ID en el mismo formato - if [[ -n "$name" ]]; then - echo "$name ($vmid)" - else - echo "$vmid" - fi -} -# Función para configurar notificaciones -configure_notifications() { - # Ordenar las opciones por categoría y descripción - IFS=$'\n' sorted_options=($(for option in "${options[@]}"; do - IFS='|' read -r category description var_name <<< "$option" - printf "%s|%s|%s\n" "$category" "$description" "$var_name" - done | sort -t'|' -k1,1 -k2,2)) - unset IFS +######################################################################### - # Crear un mapeo de índices a nombres de variables - declare -A index_to_var - index=1 - # Preparar las opciones para `whiptail` - menu_items=() - for option in "${sorted_options[@]}"; do - IFS='|' read -r category description var_name <<< "$option" - # Guardar el mapeo de índice a nombre de variable - index_to_var["$index"]="$var_name" - - # Formatear descripción con espacios para alinear la categoría a la derecha - formatted_item="$description" - current_length=${#formatted_item} - spaces_needed=$((50 - current_length)) - - for ((j = 0; j < spaces_needed; j++)); do - formatted_item+=" " - done - - formatted_item+="$category" - - # Marcar como ON u OFF según la configuración guardada - state="OFF" - [[ "$(eval echo \$$var_name)" -eq 1 ]] && state="ON" - - menu_items+=("$index" "$formatted_item" "$state") - ((index++)) - done - - # Mostrar menú `whiptail` - selected_indices=$(whiptail --title "$(translate "Configuración de Notificaciones a Telegram")" \ - --checklist --separate-output \ - "\n$(translate "Selecciona los eventos que deseas recibir:")\n" \ - 30 100 20 \ - "${menu_items[@]}" \ - 3>&1 1>&2 2>&3) - - local result=$? - - # Si se ha seleccionado alguna opción, actualizar configuración - if [[ $result -eq 0 ]]; then - # Hacer una copia de seguridad del archivo de configuración - cp "$CONFIG_FILE" "${CONFIG_FILE}.bak" 2>/dev/null - - # Establecer todas las opciones en OFF - for var_name in "${index_to_var[@]}"; do - sed -i "s/^$var_name=.*/$var_name=0/" "$CONFIG_FILE" - done - - # Activar las opciones seleccionadas - for selected_index in $selected_indices; do - var_name="${index_to_var[$selected_index]}" - sed -i "s/^$var_name=.*/$var_name=1/" "$CONFIG_FILE" - done - - # Recargar la configuración - source "$CONFIG_FILE" - - whiptail --title "$(translate "Éxito")" \ - --msgbox "$(translate "Configuración actualizada correctamente.")" 10 70 - fi -} - -# Función: captura eventos desde journalctl -capture_journal_events() { - # Usar un archivo para almacenar eventos ya procesados - local processed_events_file="$PID_DIR/processed_events" - - # Crear directorio si no existe - mkdir -p "$PID_DIR" 2>/dev/null - - # Crear el archivo si no existe - if [[ ! -f "$processed_events_file" ]]; then - touch "$processed_events_file" - fi - - # Monitorear continuamente el log - while true; do - # Usar tail para el archivo de tareas de Proxmox - tail -F /var/log/pve/tasks/index 2>/dev/null | while read -r line; do - # Crear un identificador único para este evento - event_id=$(echo "$line" | md5sum | cut -d' ' -f1) - - # Verificar si ya procesamos este evento - if grep -q "$event_id" "$processed_events_file" 2>/dev/null; then - continue - fi - - # Añadir el ID al archivo de eventos procesados - echo "$event_id" >> "$processed_events_file" - - # Limitar el tamaño del archivo de eventos procesados - tail -n 1000 "$processed_events_file" > "${processed_events_file}.tmp" 2>/dev/null && mv "${processed_events_file}.tmp" "$processed_events_file" 2>/dev/null - - # Variable para controlar si el evento ya fue procesado por algún patrón - local event_processed=false - - # ===== EVENTOS CRÍTICOS (INMEDIATOS) ===== - - # Error al iniciar VM (CRÍTICO) - if [[ "$line" =~ "Failed to start VM" ]] && [[ "$vm_fail" -eq 1 ]] && [[ "$event_processed" = false ]]; then - VM_ID=$(echo "$line" | grep -oP 'VM \K[0-9]+') - NAME=$(get_vm_name "$VM_ID") - send_notification "🚨 $(translate "CRÍTICO: Error al iniciar la VM:") $NAME" - event_processed=true - fi - - # Errores de I/O de disco (CRÍTICO) - if [[ "$disk_io_error" -eq 1 ]] && [[ "$event_processed" = false ]]; then - if [[ "$line" =~ "I/O error" || "$line" =~ "read error" || "$line" =~ "write error" || "$line" =~ "blk_update_request" || "$line" =~ "buffer I/O error" ]]; then - DISK=$(echo "$line" | grep -oE "/dev/[a-zA-Z0-9]+" || echo "desconocido") - send_notification "🚨 $(translate "CRÍTICO: Error de lectura/escritura en disco:") $DISK" - event_processed=true - fi - fi - - # Fallo de disco (CRÍTICO) - if [[ "$disk_fail" -eq 1 ]] && [[ "$line" =~ "disk failure" ]] && [[ "$event_processed" = false ]]; then - DISK=$(echo "$line" | grep -oE "/dev/[a-zA-Z0-9]+" || echo "desconocido") - send_notification "🚨 $(translate "CRÍTICO: Fallo de disco detectado:") $DISK" - event_processed=true - fi - - # Snapshot fallido (CRÍTICO) - if [[ "$line" =~ "snapshot" ]] && [[ "$snapshot_fail" -eq 1 ]] && [[ "$line" =~ "error" ]] && [[ "$event_processed" = false ]]; then - VM_ID=$(echo "$line" | grep -oP 'TASK \K[0-9]+' || echo "") - if [[ -n "$VM_ID" ]]; then - NAME=$(get_vm_name "$VM_ID") - send_notification "🚨 $(translate "CRÍTICO: Snapshot fallido para:") $NAME" - else - send_notification "🚨 $(translate "CRÍTICO: Snapshot fallido")" - fi - event_processed=true - fi - - # Backup fallido (CRÍTICO) - if [[ "$line" =~ "backup" ]] && [[ "$backup_fail" -eq 1 ]] && [[ "$line" =~ "error" ]] && [[ "$event_processed" = false ]]; then - VM_ID=$(echo "$line" | grep -oP 'TASK \K[0-9]+' || echo "") - if [[ -n "$VM_ID" ]]; then - NAME=$(get_vm_name "$VM_ID") - send_notification "🚨 $(translate "CRÍTICO: Backup fallido para:") $NAME" - else - send_notification "🚨 $(translate "CRÍTICO: Backup fallido")" - fi - event_processed=true - fi - - # Intento de autenticación fallido (CRÍTICO) - if [[ "$line" =~ "authentication failure" ]] && [[ "$auth_fail" -eq 1 ]] && [[ "$event_processed" = false ]]; then - USER=$(echo "$line" | grep -oP 'user=\K[^ ]+' || echo "desconocido") - IP=$(echo "$line" | grep -oP 'rhost=\K[^ ]+' || echo "desconocida") - send_notification "🚨 $(translate "CRÍTICO: Intento de autenticación fallido:") $USER desde $IP" - event_processed=true - fi - - # Problema con el firewall (CRÍTICO) - if [[ "$line" =~ "firewall" ]] && [[ "$firewall_issue" -eq 1 ]] && [[ "$line" =~ "error|block|reject" ]] && [[ "$event_processed" = false ]]; then - send_notification "🚨 $(translate "CRÍTICO: Problema con el firewall:") $line" - event_processed=true - fi - - # Interfaz de red caída (CRÍTICO) - if [[ "$line" =~ "network" ]] && [[ "$network_down" -eq 1 ]] && [[ "$line" =~ "down" ]] && [[ "$event_processed" = false ]]; then - IFACE=$(echo "$line" | grep -oP 'interface \K[^ ]+' || echo "desconocida") - send_notification "🚨 $(translate "CRÍTICO: Interfaz de red caída:") $IFACE" - event_processed=true - fi - - # Split-brain detectado (CRÍTICO) - if [[ "$line" =~ "Split-Brain" ]] && [[ "$split_brain" -eq 1 ]] && [[ "$event_processed" = false ]]; then - send_notification "🚨 $(translate "CRÍTICO: Split-brain detectado en el clúster")" - event_processed=true - fi - - # Nodo desconectado del clúster (CRÍTICO) - if [[ "$line" =~ "quorum" ]] && [[ "$node_disconnect" -eq 1 ]] && [[ "$line" =~ "lost" ]] && [[ "$event_processed" = false ]]; then - NODE=$(echo "$line" | grep -oP 'node \K[^ ]+' || echo "desconocido") - send_notification "🚨 $(translate "CRÍTICO: Nodo desconectado del clúster:") $NODE" - event_processed=true - fi - - # Kernel panic (CRÍTICO) - if [[ "$line" =~ "kernel panic" ]] && [[ "$kernel_panic" -eq 1 ]] && [[ "$event_processed" = false ]]; then - send_notification "🚨 $(translate "CRÍTICO: Kernel panic detectado")" - event_processed=true - fi - - # ===== EVENTOS NO CRÍTICOS (INMEDIATOS) ===== - - # Inicio de VM (NO CRÍTICO pero inmediato) - if [[ "$line" =~ "qmstart" ]] && [[ "$vm_start" -eq 1 ]] && [[ "$event_processed" = false ]]; then - VM_ID=$(echo "$line" | grep -oP 'qmstart:\K[0-9]+') - NAME=$(get_vm_name "$VM_ID") - send_notification "✅ $(translate "VM comenzó con éxito:") $NAME" - event_processed=true - fi - - # Apagado de VM (NO CRÍTICO pero inmediato) - if [[ "$line" =~ "qmstop" ]] && [[ "$vm_shutdown" -eq 1 ]] && [[ "$event_processed" = false ]]; then - VM_ID=$(echo "$line" | grep -oP 'qmstop:\K[0-9]+') - NAME=$(get_vm_name "$VM_ID") - send_notification "✅ $(translate "VM se detuvo con éxito:") $NAME" - event_processed=true - fi - - # Reinicio de VM (NO CRÍTICO pero inmediato) - if [[ "$line" =~ "qmreset" || "$line" =~ "qmreboot" ]] && [[ "$vm_restart" -eq 1 ]] && [[ "$event_processed" = false ]]; then - VM_ID=$(echo "$line" | grep -oP '(qmreset|qmreboot):\K[0-9]+') - NAME=$(get_vm_name "$VM_ID") - send_notification "✅ $(translate "VM reinició con éxito:") $NAME" - event_processed=true - fi - - # Snapshot completado (NO CRÍTICO pero inmediato) - if [[ "$line" =~ "snapshot" ]] && [[ "$snapshot_complete" -eq 1 ]] && [[ ! "$line" =~ "error" ]] && [[ "$event_processed" = false ]]; then - VM_ID=$(echo "$line" | grep -oP 'TASK \K[0-9]+' || echo "") - if [[ -n "$VM_ID" ]]; then - NAME=$(get_vm_name "$VM_ID") - send_notification "✅ $(translate "Snapshot completado para:") $NAME" - else - send_notification "✅ $(translate "Snapshot completado")" - fi - event_processed=true - fi - - # Backup completado (NO CRÍTICO pero inmediato) - if [[ "$line" =~ "backup" ]] && [[ "$backup_complete" -eq 1 ]] && [[ "$line" =~ "successful" ]] && [[ "$event_processed" = false ]]; then - VM_ID=$(echo "$line" | grep -oP 'TASK \K[0-9]+' || echo "") - if [[ -n "$VM_ID" ]]; then - NAME=$(get_vm_name "$VM_ID") - send_notification "✅ $(translate "Backup completado para:") $NAME" - else - send_notification "✅ $(translate "Backup completado")" - fi - event_processed=true - fi - - # Actualización completada (NO CRÍTICO pero inmediato) - if [[ "$line" =~ "update" ]] && [[ "$update_complete" -eq 1 ]] && [[ "$line" =~ "complete" ]] && [[ "$event_processed" = false ]]; then - send_notification "✅ $(translate "Actualización del sistema completada")" - event_processed=true - fi - done - - # Si llegamos aquí, es porque tail -F terminó inesperadamente - sleep 5 - done -} - -# Función: captura eventos directos del sistema -capture_direct_events() { - # Variables para controlar la frecuencia de las notificaciones - local last_load_notification=0 - local last_temp_notification=0 - local last_disk_space_notification=0 - local last_cpu_notification=0 - local last_ram_notification=0 - local last_update_notification=0 - - # Tiempo mínimo entre notificaciones repetitivas (en segundos) - local resource_interval=900 # 15 minutos para recursos - local update_interval=86400 # 24 horas para actualizaciones - - # Variables para eventos CRÍTICOS (sin intervalo) - local disk_full_detected=false - - while true; do - current_time=$(date +%s) - - # ===== EVENTOS CRÍTICOS (INMEDIATOS) ===== - - # Disco lleno (CRÍTICO - inmediato) - if [[ "$disk_full" -eq 1 ]]; then - full_disks=$(df -h | awk '$5 == "100%" {print $1 " (100% lleno)"}') - if [[ -n "$full_disks" && "$disk_full_detected" = false ]]; then - send_notification "🚨 $(translate "CRÍTICO: Almacenamiento completamente lleno:") $full_disks" - disk_full_detected=true - elif [[ -z "$full_disks" ]]; then - disk_full_detected=false - fi - fi - - # ===== EVENTOS NO CRÍTICOS (CON INTERVALO) ===== - - # Carga alta del sistema (NO CRÍTICO - con intervalo) - if [[ "$system_load_high" -eq 1 ]]; then - load=$(awk '{print $1}' /proc/loadavg) - if (( $(echo "$load > 5.00" | bc -l) )) && (( current_time - last_load_notification > resource_interval )); then - send_notification "⚠️ $(translate "Carga alta del sistema detectada:") $load" - last_load_notification=$current_time - fi - fi - - # Actualizaciones disponibles (NO CRÍTICO - con intervalo diario) - if [[ "$update_available" -eq 1 ]] && (( current_time - last_update_notification > update_interval )); then - if command -v apt-get &>/dev/null; then - apt-get update -qq &>/dev/null - updates=$(apt list --upgradable 2>/dev/null | grep -v "Listing..." | wc -l) - if [[ $updates -gt 0 ]]; then - send_notification "ℹ️ $(translate "Actualizaciones disponibles:") $updates" - last_update_notification=$current_time - fi - fi - fi - - # Espacio en disco bajo (NO CRÍTICO - con intervalo) - if [[ "$low_disk_space" -eq 1 ]] && (( current_time - last_disk_space_notification > resource_interval )); then - # Comprobar particiones con más del 90% pero menos del 100% de uso - low_space=$(df -h | awk '$5 ~ /9[0-9]%/ && $5 != "100%" {print $1 " (" $5 " lleno)"}') - if [[ -n "$low_space" ]]; then - send_notification "⚠️ $(translate "Espacio en disco bajo:") $low_space" - last_disk_space_notification=$current_time - fi - fi - - # Uso alto de CPU (NO CRÍTICO - con intervalo) - if [[ "$cpu_high" -eq 1 ]] && (( current_time - last_cpu_notification > resource_interval )); then - # Usar mpstat si está disponible, si no, usar top - if command -v mpstat &>/dev/null; then - cpu_usage=$(mpstat 1 1 | awk '/Average:/ {print 100 - $NF}') - else - cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2 + $4}') - fi - - if (( $(echo "$cpu_usage > 90" | bc -l) )); then - send_notification "⚠️ $(translate "Uso alto de CPU:") $cpu_usage%" - last_cpu_notification=$current_time - fi - fi - - # Uso alto de RAM (NO CRÍTICO - con intervalo) - if [[ "$ram_high" -eq 1 ]] && (( current_time - last_ram_notification > resource_interval )); then - ram_usage=$(free | awk '/Mem:/ {printf "%.2f", $3/$2 * 100}') - if (( $(echo "$ram_usage > 90" | bc -l) )); then - send_notification "⚠️ $(translate "Uso alto de RAM:") $ram_usage%" - last_ram_notification=$current_time - fi - fi - - # Temperatura alta (NO CRÍTICO - con intervalo) - if [[ "$temp_high" -eq 1 ]] && (( current_time - last_temp_notification > resource_interval )); then - if command -v sensors &>/dev/null; then - # Intentar diferentes patrones para detectar la temperatura - temp=$(sensors | grep -E 'Package id 0:|Core 0:|CPU:' | head -1 | grep -oP '\+\d+\.\d+°C' | grep -oP '\d+\.\d+' || echo "0") - - if [[ -n "$temp" && "$temp" != "0" ]] && (( $(echo "$temp > 80" | bc -l) )); then - send_notification "⚠️ $(translate "Temperatura alta del sistema:") $temp°C" - last_temp_notification=$current_time - fi - fi - fi - - # Pausa entre comprobaciones - sleep 30 - done -} - -# Función para configurar Telegram +# Function to configure Telegram configure_telegram() { - # Cargar configuración existente (si existe) + [[ -f "$CONFIG_FILE" ]] && source "$CONFIG_FILE" - # Solicitar BOT_TOKEN - BOT_TOKEN=$(whiptail --title "$(translate "Configuración de Telegram")" \ - --inputbox "$(translate "Introduce tu Token de Bot de Telegram:")" 10 70 "$BOT_TOKEN" 3>&1 1>&2 2>&3) + + BOT_TOKEN=$(whiptail --title "$(translate "Telegram Configuration")" \ + --inputbox "$(translate "Enter your Telegram Bot Token:")" 10 70 "$BOT_TOKEN" 3>&1 1>&2 2>&3) if [[ $? -ne 0 ]]; then return fi - # Solicitar CHAT_ID - CHAT_ID=$(whiptail --title "$(translate "Configuración de Telegram")" \ - --inputbox "$(translate "Introduce tu ID de Chat de Telegram:")" 10 70 "$CHAT_ID" 3>&1 1>&2 2>&3) + + CHAT_ID=$(whiptail --title "$(translate "Telegram Configuration")" \ + --inputbox "$(translate "Enter your Telegram Chat ID:")" 10 70 "$CHAT_ID" 3>&1 1>&2 2>&3) if [[ $? -ne 0 ]]; then return fi - # Guardar configuración en archivo + # Save configuration to file if [[ -n "$BOT_TOKEN" && -n "$CHAT_ID" ]]; then - # Hacer una copia de seguridad del archivo de configuración + cp "$CONFIG_FILE" "${CONFIG_FILE}.bak" 2>/dev/null - # Verificar si las variables ya existen en el archivo + if grep -q "^BOT_TOKEN=" "$CONFIG_FILE"; then sed -i "s/^BOT_TOKEN=.*/BOT_TOKEN=\"$BOT_TOKEN\"/" "$CONFIG_FILE" else @@ -535,55 +143,1677 @@ configure_telegram() { echo "CHAT_ID=\"$CHAT_ID\"" >> "$CONFIG_FILE" fi - # Recargar la configuración + source "$CONFIG_FILE" - # Probar la configuración inmediatamente + # Test the configuration immediately response=$(curl -s -X POST "https://api.telegram.org/bot$BOT_TOKEN/sendMessage" \ -d "chat_id=$CHAT_ID" \ - -d "text=$(translate "¡Telegram está funcionando correctamente!")") + -d "text=$(translate "Telegram is working correctly!")") if [[ "$response" =~ "ok\":true" ]]; then - whiptail --title "$(translate "Éxito")" \ - --msgbox "$(translate "Configuración de Telegram válida. Las notificaciones serán enviadas.")" 10 70 + whiptail --title "$(translate "Success")" \ + --msgbox "$(translate "Valid Telegram configuration. Notifications will be sent.")" 10 70 else whiptail --title "$(translate "Error")" \ - --msgbox "$(translate "Configuración de Telegram inválida. Por favor, verifica el token y el ID de chat.")" 10 70 + --msgbox "$(translate "Invalid Telegram configuration. Please verify the token and chat ID.")" 10 70 fi else whiptail --title "$(translate "Error")" \ - --msgbox "$(translate "Configuración de Telegram incompleta. Por favor, proporciona tanto el token como el ID de chat.")" 10 70 + --msgbox "$(translate "Incomplete Telegram configuration. Please provide both token and chat ID.")" 10 70 fi } +# ================================================== + + + + + +# ================================================== +# NOTIFICATION CONFIGURATION +# ================================================== + + + +# Options for the menu +options=( + "VM and Container|$(t 'VM/Container Start')|vm_start" + "VM and Container|$(t 'VM/Container Shutdown')|vm_shutdown" + "VM and Container|$(t 'VM/Container Restart')|vm_restart" + "VM and Container|$(t 'VM/Container Start Failure')|vm_fail" + "System|$(t 'New update available')|update_available" + "System|$(t 'Update completed')|update_complete" + "System|$(t 'System shutdown')|system_shutdown" + "System|$(t 'System problem')|system_problem" + "System|$(t 'High system load')|system_load_high" + "Storage|$(t 'Disk failure')|disk_fail" + "Storage|$(t 'Storage full')|disk_full" + "Storage|$(t 'Read/Write issues')|disk_io_error" + "Cluster|$(t 'Node disconnected')|node_disconnect" + "Cluster|$(t 'Split-brain (quorum conflict)')|split_brain" + "Network|$(t 'Network interface down')|network_down" + "Network|$(t 'Network saturation')|network_saturation" + "Network|$(t 'Firewall issue')|firewall_issue" + "Backup and Snapshot|$(t 'Backup completed')|backup_complete" + "Backup and Snapshot|$(t 'Backup failed')|backup_fail" + "Backup and Snapshot|$(t 'Snapshot completed')|snapshot_complete" + "Backup and Snapshot|$(t 'Snapshot failed')|snapshot_fail" + "Security|$(t 'Failed authentication attempt')|auth_fail" + "Security|$(t 'Automatic IP blocks')|ip_block" + "Security|$(t 'User permission change')|user_permission_change" + "Resources|$(t 'High CPU usage')|cpu_high" + "Resources|$(t 'High RAM usage')|ram_high" + "Resources|$(t 'High system temperature')|temp_high" + "Resources|$(t 'Low disk space')|low_disk_space" +) + + + + + + +# Function to configure notifications +configure_notifications() { + + + IFS=$'\n' sorted_options=($(for option in "${options[@]}"; do + IFS='|' read -r category description var_name <<< "$option" + printf "%s|%s|%s\n" "$category" "$description" "$var_name" + done | sort -t'|' -k1,1 -k2,2)) + unset IFS + + + declare -A index_to_var + index=1 + + + menu_items=() + for option in "${sorted_options[@]}"; do + IFS='|' read -r category description var_name <<< "$option" + + + index_to_var["$index"]="$var_name" + + + formatted_item="$description" + current_length=${#formatted_item} + spaces_needed=$((50 - current_length)) + + for ((j = 0; j < spaces_needed; j++)); do + formatted_item+=" " + done + + formatted_item+="$category" + + + state="OFF" + [[ "$(eval echo \$$var_name)" -eq 1 ]] && state="ON" + + menu_items+=("$index" "$formatted_item" "$state") + ((index++)) + done + + # whiptail menu + selected_indices=$(whiptail --backtitle "ProxMenuX" --title "$(translate "Telegram Notification Configuration")" \ + --checklist --separate-output \ + "\n$(translate "Select the events you want to receive:")\n" \ + 30 100 20 \ + "${menu_items[@]}" \ + 3>&1 1>&2 2>&3) + + local result=$? + + + if [[ $result -eq 0 ]]; then + + cp "$CONFIG_FILE" "${CONFIG_FILE}.bak" 2>/dev/null + + for var_name in "${index_to_var[@]}"; do + sed -i "s/^$var_name=.*/$var_name=0/" "$CONFIG_FILE" + done + + for selected_index in $selected_indices; do + var_name="${index_to_var[$selected_index]}" + sed -i "s/^$var_name=.*/$var_name=1/" "$CONFIG_FILE" + done + + + source "$CONFIG_FILE" + + whiptail --backtitle "ProxMenuX" --title "$(translate "Success")" \ + --msgbox "$(translate "Configuration updated successfully.")" 10 70 + fi +} + + + + + +# ================================================== + + + + +# Function to get VM/CT name from its ID +get_vm_name() { + local vmid="$1" + local name="" + + if [[ -f "/etc/pve/qemu-server/$vmid.conf" ]]; then + name=$(grep -i "^name:" "/etc/pve/qemu-server/$vmid.conf" | cut -d ' ' -f2-) + elif [[ -f "/etc/pve/lxc/$vmid.conf" ]]; then + name=$(grep -i "^hostname:" "/etc/pve/lxc/$vmid.conf" | cut -d ' ' -f2-) + fi + + + if [[ -n "$name" ]]; then + echo "$name ($vmid)" + else + echo "$vmid" + fi +} + + +# ================================================== + + + + + +# ================================================== +# NOTIFICATION EVENTS +# ================================================== + + + + +# Function: capture events from journalctl +capture_journal_events() { + + + local processed_events_file="$PID_DIR/processed_events" + + + mkdir -p "$PID_DIR" 2>/dev/null + + + if [[ ! -f "$processed_events_file" ]]; then + touch "$processed_events_file" + fi + + + while true; do + + # Use tail for Proxmox tasks file + tail -F /var/log/pve/tasks/index 2>/dev/null | while read -r line; do + + event_id=$(echo "$line" | md5sum | cut -d' ' -f1) + + if grep -q "$event_id" "$processed_events_file" 2>/dev/null; then + continue + fi + + + echo "$event_id" >> "$processed_events_file" + + tail -n 1000 "$processed_events_file" > "${processed_events_file}.tmp" 2>/dev/null && mv "${processed_events_file}.tmp" "$processed_events_file" 2>/dev/null + + local event_processed=false + + + + + # ===== IMMEDIATE NOTIFICATION EVENTS ===== + + # VM or CT start failure (CRITICAL) + if [[ "$vm_fail" -eq 1 ]] && [[ "$event_processed" = false ]]; then + # Detect VM errors + if [[ "$line" =~ "Failed to start VM" || "$line" =~ "qmstart" && "$line" =~ "err" || "$line" =~ "qmstart" && "$line" =~ "fail" ]]; then + VM_ID=$(echo "$line" | grep -oP '(VM |qmstart:)\K[0-9]+') + NAME=$(get_vm_name "$VM_ID") + send_notification "🚨 $(translate "CRITICAL: Failed to start VM:") $NAME" + event_processed=true + # Detect CT (LXC) errors + elif [[ "$line" =~ "Failed to start CT" || "$line" =~ "lxc-start" && "$line" =~ "err" || "$line" =~ "lxc-start" && "$line" =~ "fail" ]]; then + CT_ID=$(echo "$line" | grep -oP '(CT |lxc-start:)\K[0-9]+') + NAME=$(get_vm_name "$CT_ID") + send_notification "🚨 $(translate "CRITICAL: Failed to start Container:") $NAME" + event_processed=true + fi + fi + + + + # Disk I/O errors (CRITICAL) + if [[ "$disk_io_error" -eq 1 ]] && [[ "$event_processed" = false ]]; then + if [[ "$line" =~ "I/O error" || "$line" =~ "read error" || "$line" =~ "write error" || + "$line" =~ "blk_update_request" || "$line" =~ "buffer I/O error" || + "$line" =~ "medium error" || "$line" =~ "sense key: Medium Error" || + "$line" =~ "ata.*failed command" || "$line" =~ "SCSI error" ]]; then + + # Extract device name with improved pattern matching + DISK=$(echo "$line" | grep -oE "/dev/[a-zA-Z0-9]+" || + echo "$line" | grep -oE "sd[a-z][0-9]*" || + echo "$line" | grep -oE "nvme[0-9]+n[0-9]+" || + echo "unknown") + + # Try to extract error type + ERROR_TYPE="unknown" + if [[ "$line" =~ "read error" ]]; then + ERROR_TYPE="read" + elif [[ "$line" =~ "write error" ]]; then + ERROR_TYPE="write" + elif [[ "$line" =~ "medium error" || "$line" =~ "sense key: Medium Error" ]]; then + ERROR_TYPE="medium" + elif [[ "$line" =~ "timeout" ]]; then + ERROR_TYPE="timeout" + fi + + # Try to extract sector information if available + SECTOR=$(echo "$line" | grep -oP "sector [0-9]+" || echo "") + if [[ -n "$SECTOR" ]]; then + SECTOR=" ($SECTOR)" + fi + + # Send notification with enhanced information + send_notification "🚨 $(translate "CRITICAL: Disk ${ERROR_TYPE} error on:") $DISK$SECTOR" + + event_processed=true + fi + fi + + + + # Disk failure (CRITICAL) + if [[ "$disk_fail" -eq 1 ]] && [[ "$event_processed" = false ]]; then + if [[ "$line" =~ "disk failure" || "$line" =~ "hard drive failure" || + "$line" =~ "SMART error" || "$line" =~ "SMART failure" || + "$line" =~ "SMART Status BAD" || "$line" =~ "failed SMART" || + "$line" =~ "drive failure" || "$line" =~ "bad sectors" || + "$line" =~ "sector reallocation" || "$line" =~ "uncorrectable error" || + "$line" =~ "media error" || "$line" =~ "not responding" && "$line" =~ "disk" || + "$line" =~ "SSD life critical" || "$line" =~ "SSD wear" && "$line" =~ "critical" ]]; then + + # Extract device name with improved pattern matching + DISK=$(echo "$line" | grep -oE "/dev/[a-zA-Z0-9]+" || + echo "$line" | grep -oE "sd[a-z][0-9]*" || + echo "$line" | grep -oE "nvme[0-9]+n[0-9]+" || + echo "$line" | grep -oE "ata[0-9]+" || + echo "unknown") + + # Try to determine failure type + FAILURE_TYPE="hardware" + if [[ "$line" =~ "SMART" ]]; then + FAILURE_TYPE="SMART" + + # Try to extract SMART attribute if available + SMART_ATTR=$(echo "$line" | grep -oP "Attribute \K[^:]*" || + echo "$line" | grep -oP "SMART attribute \K[^:]*" || + echo "") + if [[ -n "$SMART_ATTR" ]]; then + SMART_ATTR=" (Attribute: $SMART_ATTR)" + fi + elif [[ "$line" =~ "bad sectors" || "$line" =~ "sector reallocation" ]]; then + FAILURE_TYPE="bad sectors" + elif [[ "$line" =~ "SSD" ]]; then + FAILURE_TYPE="SSD wear" + elif [[ "$line" =~ "not responding" ]]; then + FAILURE_TYPE="unresponsive" + fi + + # Send notification with enhanced information + send_notification "🚨 $(translate "CRITICAL: Disk failure detected") ($FAILURE_TYPE): $DISK$SMART_ATTR" + + event_processed=true + fi + fi + + + + # Snapshot failed (CRITICAL) + if [[ "$line" =~ "snapshot" ]] && [[ "$snapshot_fail" -eq 1 ]] && [[ "$line" =~ "error" || "$line" =~ "fail" || "$line" =~ "unable to" || "$line" =~ "cannot" ]] && [[ "$event_processed" = false ]]; then + # Extract VM/CT ID with improved pattern matching + VM_ID=$(echo "$line" | grep -oP 'TASK \K[0-9]+' || + echo "$line" | grep -oP 'VM \K[0-9]+' || + echo "$line" | grep -oP 'CT \K[0-9]+' || echo "") + + # Try to extract snapshot name/ID if available + SNAPSHOT_ID=$(echo "$line" | grep -oP 'snapshot \K[a-zA-Z0-9_-]+' || + echo "$line" | grep -oP 'snap\K[a-zA-Z0-9_-]+' || echo "") + + # Try to determine error reason + ERROR_REASON="" + if [[ "$line" =~ "no space" || "$line" =~ "space exhausted" || "$line" =~ "out of space" ]]; then + ERROR_REASON=" (No space left)" + elif [[ "$line" =~ "timeout" ]]; then + ERROR_REASON=" (Operation timed out)" + elif [[ "$line" =~ "already exists" ]]; then + ERROR_REASON=" (Snapshot already exists)" + elif [[ "$line" =~ "locked" || "$line" =~ "lock" ]]; then + ERROR_REASON=" (Resource locked)" + elif [[ "$line" =~ "permission" ]]; then + ERROR_REASON=" (Permission denied)" + elif [[ "$line" =~ "quorum" ]]; then + ERROR_REASON=" (Quorum error)" + fi + + # Format the notification message + if [[ -n "$VM_ID" ]]; then + NAME=$(get_vm_name "$VM_ID") + if [[ -n "$SNAPSHOT_ID" ]]; then + send_notification "🚨 $(translate "CRITICAL: Snapshot failed for:") $NAME (ID: $SNAPSHOT_ID)$ERROR_REASON" + else + send_notification "🚨 $(translate "CRITICAL: Snapshot failed for:") $NAME$ERROR_REASON" + fi + else + if [[ -n "$SNAPSHOT_ID" ]]; then + send_notification "🚨 $(translate "CRITICAL: Snapshot failed") (ID: $SNAPSHOT_ID)$ERROR_REASON" + else + send_notification "🚨 $(translate "CRITICAL: Snapshot failed")$ERROR_REASON" + fi + fi + + event_processed=true + fi + + + + # Backup failed (CRITICAL) + if [[ "$backup_fail" -eq 1 ]] && [[ "$event_processed" = false ]]; then + # Expanded pattern matching for backup failures + if [[ "$line" =~ "backup" && ("$line" =~ "error" || "$line" =~ "fail" || "$line" =~ "unable to" || "$line" =~ "cannot" || "$line" =~ "abort") ]]; then + # Extract VM/CT ID with improved pattern matching + VM_ID=$(echo "$line" | grep -oP 'TASK \K[0-9]+' || + echo "$line" | grep -oP 'VM \K[0-9]+' || + echo "$line" | grep -oP 'CT \K[0-9]+' || echo "") + + # Try to extract backup storage/target if available + BACKUP_TARGET=$(echo "$line" | grep -oP 'to ["\047]?\K[a-zA-Z0-9_-]+' || + echo "$line" | grep -oP 'storage ["\047]?\K[a-zA-Z0-9_-]+' || echo "") + + # Try to determine error reason + ERROR_REASON="" + if [[ "$line" =~ "no space" || "$line" =~ "space exhausted" || "$line" =~ "out of space" ]]; then + ERROR_REASON=" (No space left)" + elif [[ "$line" =~ "timeout" ]]; then + ERROR_REASON=" (Operation timed out)" + elif [[ "$line" =~ "connection" && "$line" =~ "refused" ]]; then + ERROR_REASON=" (Connection refused)" + elif [[ "$line" =~ "network" ]]; then + ERROR_REASON=" (Network error)" + elif [[ "$line" =~ "permission" ]]; then + ERROR_REASON=" (Permission denied)" + elif [[ "$line" =~ "locked" || "$line" =~ "lock" ]]; then + ERROR_REASON=" (Resource locked)" + elif [[ "$line" =~ "quorum" ]]; then + ERROR_REASON=" (Quorum error)" + elif [[ "$line" =~ "already running" ]]; then + ERROR_REASON=" (Another backup is already running)" + fi + + # Format the notification message + if [[ -n "$VM_ID" ]]; then + NAME=$(get_vm_name "$VM_ID") + if [[ -n "$BACKUP_TARGET" ]]; then + send_notification "🚨 $(translate "CRITICAL: Backup failed for:") $NAME (Target: $BACKUP_TARGET)$ERROR_REASON" + else + send_notification "🚨 $(translate "CRITICAL: Backup failed for:") $NAME$ERROR_REASON" + fi + else + if [[ -n "$BACKUP_TARGET" ]]; then + send_notification "🚨 $(translate "CRITICAL: Backup failed") (Target: $BACKUP_TARGET)$ERROR_REASON" + else + send_notification "🚨 $(translate "CRITICAL: Backup failed")$ERROR_REASON" + fi + fi + + event_processed=true + fi + fi + + + + # Failed authentication attempt (CRITICAL) + if [[ "$auth_fail" -eq 1 ]] && [[ "$event_processed" = false ]]; then + if [[ "$line" =~ "authentication failure" || "$line" =~ "auth fail" || "$line" =~ "login failed" || + "$line" =~ "Failed password" || "$line" =~ "Invalid user" || "$line" =~ "failed login" || + "$line" =~ "authentication error" || "$line" =~ "unauthorized" && "$line" =~ "access" ]]; then + + # Extract username with improved pattern matching + USER=$(echo "$line" | grep -oP 'user=\K[^ ]+' || + echo "$line" | grep -oP 'user \K[^ ]+' || + echo "$line" | grep -oP 'for user \K[^ ]+' || + echo "$line" | grep -oP 'for invalid user \K[^ ]+' || + echo "$line" | grep -oP 'for \K[^ ]+' | grep -v "invalid" || + echo "unknown") + + # Extract IP address with improved pattern matching + IP=$(echo "$line" | grep -oP 'rhost=\K[^ ]+' || + echo "$line" | grep -oP 'from \K[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' || + echo "$line" | grep -oP 'from \K[0-9a-f:]+' || + echo "$line" | grep -oP 'IP: \K[^ ]+' || + echo "unknown") + + # Try to determine authentication service + SERVICE="system" + if [[ "$line" =~ "sshd" ]]; then + SERVICE="SSH" + elif [[ "$line" =~ "pvedaemon" || "$line" =~ "pveproxy" ]]; then + SERVICE="Proxmox Web UI" + elif [[ "$line" =~ "nginx" || "$line" =~ "apache" ]]; then + SERVICE="Web Server" + elif [[ "$line" =~ "smtp" || "$line" =~ "mail" ]]; then + SERVICE="Mail" + elif [[ "$line" =~ "ftp" ]]; then + SERVICE="FTP" + fi + + # Try to extract authentication method if available + AUTH_METHOD="" + if [[ "$line" =~ "password" ]]; then + AUTH_METHOD=" (Password auth)" + elif [[ "$line" =~ "publickey" ]]; then + AUTH_METHOD=" (Public key auth)" + elif [[ "$line" =~ "keyboard-interactive" ]]; then + AUTH_METHOD=" (Interactive auth)" + elif [[ "$line" =~ "PAM" ]]; then + AUTH_METHOD=" (PAM auth)" + fi + + # Count failed attempts from this IP if possible + ATTEMPT_COUNT="" + if [[ -n "$IP" && "$IP" != "unknown" ]]; then + # Use journalctl to count recent failed attempts from this IP + if command -v journalctl &>/dev/null; then + COUNT=$(journalctl -q --since "1 hour ago" | grep -c "$IP") + if [[ $COUNT -gt 1 ]]; then + ATTEMPT_COUNT=" ($COUNT attempts in the last hour)" + fi + fi + fi + + # Send notification with enhanced information + send_notification "🚨 $(translate "CRITICAL: Failed authentication attempt:") $USER from $IP - $SERVICE$AUTH_METHOD$ATTEMPT_COUNT" + + event_processed=true + fi + fi + + + + # Firewall issue (CRITICAL) + if [[ "$line" =~ "firewall" ]] && [[ "$firewall_issue" -eq 1 ]] && [[ "$line" =~ "error" || "$line" =~ "block" || "$line" =~ "reject" || + "$line" =~ "drop" || "$line" =~ "denied" || "$line" =~ "fail" || "$line" =~ "invalid" ]] && [[ "$event_processed" = false ]]; then + + # Try to determine the type of firewall issue + ISSUE_TYPE="issue" + if [[ "$line" =~ "error" ]]; then + ISSUE_TYPE="configuration error" + elif [[ "$line" =~ "block" || "$line" =~ "denied" ]]; then + ISSUE_TYPE="blocked connection" + elif [[ "$line" =~ "reject" ]]; then + ISSUE_TYPE="rejected connection" + elif [[ "$line" =~ "drop" ]]; then + ISSUE_TYPE="dropped packet" + elif [[ "$line" =~ "invalid" ]]; then + ISSUE_TYPE="invalid rule" + fi + + # Try to extract source IP if available + SRC_IP=$(echo "$line" | grep -oP 'SRC=\K[0-9.]+' || + echo "$line" | grep -oP 'from \K[0-9.]+' || + echo "$line" | grep -oP 'source \K[0-9.]+' || echo "") + + # Try to extract destination IP if available + DST_IP=$(echo "$line" | grep -oP 'DST=\K[0-9.]+' || + echo "$line" | grep -oP 'to \K[0-9.]+' || + echo "$line" | grep -oP 'destination \K[0-9.]+' || echo "") + + # Try to extract port information if available + PORT_INFO="" + SRC_PORT=$(echo "$line" | grep -oP 'SPT=\K[0-9]+' || echo "") + DST_PORT=$(echo "$line" | grep -oP 'DPT=\K[0-9]+' || echo "") + if [[ -n "$SRC_PORT" && -n "$DST_PORT" ]]; then + PORT_INFO=" (Port $SRC_PORT → $DST_PORT)" + elif [[ -n "$DST_PORT" ]]; then + PORT_INFO=" (Port $DST_PORT)" + fi + + # Try to extract protocol if available + PROTO=$(echo "$line" | grep -oP 'PROTO=\K[A-Za-z]+' || + echo "$line" | grep -oP 'protocol \K[A-Za-z]+' || echo "") + if [[ -n "$PROTO" ]]; then + PROTO=" $PROTO" + fi + + # Try to extract interface if available + IFACE=$(echo "$line" | grep -oP 'IN=\K[^ ]+' || + echo "$line" | grep -oP 'OUT=\K[^ ]+' || + echo "$line" | grep -oP 'on \K[^ ]+' || echo "") + if [[ -n "$IFACE" ]]; then + IFACE=" on $IFACE" + fi + + # Format the notification message + if [[ -n "$SRC_IP" && -n "$DST_IP" ]]; then + send_notification "🚨 $(translate "CRITICAL: Firewall ${ISSUE_TYPE}:") $SRC_IP → $DST_IP$PORT_INFO$PROTO$IFACE" + elif [[ -n "$SRC_IP" ]]; then + send_notification "🚨 $(translate "CRITICAL: Firewall ${ISSUE_TYPE}:") from $SRC_IP$PORT_INFO$PROTO$IFACE" + elif [[ -n "$DST_IP" ]]; then + send_notification "🚨 $(translate "CRITICAL: Firewall ${ISSUE_TYPE}:") to $DST_IP$PORT_INFO$PROTO$IFACE" + else + # Extract a more concise message from the line + CONCISE_MSG=$(echo "$line" | sed -E 's/.*firewall[^:]*: ?//i' | cut -c 1-100) + send_notification "🚨 $(translate "CRITICAL: Firewall ${ISSUE_TYPE}:") $CONCISE_MSG" + fi + + event_processed=true + fi + + + + + # Network interface recovery handler + if [[ "$network_down" -eq 1 ]] && [[ "$event_processed" = false ]]; then + if [[ "$line" =~ (eth[0-9]+|eno[0-9]+|enp[0-9]+s[0-9]+|wlan[0-9]+) ]]; then + IFACE="${BASH_REMATCH[1]}" + + # Detect interface going down + if [[ "$line" =~ "link down" || "$line" =~ "disconnected" || "$line" =~ "no carrier" || "$line" =~ "failure" ]]; then + # Mark interface as down and store the timestamp + IFACE_DOWN["$IFACE"]=true + IFACE_DOWN_TIME["$IFACE"]="$(date +%s)" + fi + + # Detect interface recovery + if [[ "$line" =~ "link up" || "$line" =~ "activated" ]]; then + if [[ "${IFACE_DOWN[$IFACE]}" == true ]]; then + RESTORE_TIME=$(date +%s) + START_TIME=${IFACE_DOWN_TIME[$IFACE]} + DURATION=$((RESTORE_TIME - START_TIME)) + + # Check if this is the default route interface + PRIMARY="" + if ip route | grep -q "default.*$IFACE"; then + PRIMARY=" (PRIMARY INTERFACE)" + fi + + # Send notification after connection is restored + send_notification "$(translate '✅ Network connection was lost and has been restored on') $IFACE$PRIMARY. $(translate 'Downtime duration'): ${DURATION}s" + + # Clean up the interface state + unset IFACE_DOWN["$IFACE"] + unset IFACE_DOWN_TIME["$IFACE"] + event_processed=true + fi + fi + fi + fi + + + + + # Split-brain detected (CRITICAL) + if [[ "$split_brain" -eq 1 ]] && [[ "$event_processed" = false ]]; then + # Expanded pattern matching for split-brain detection + if [[ "$line" =~ "Split-Brain" || "$line" =~ "split brain" || "$line" =~ "split-brain" || + "$line" =~ "fencing" && "$line" =~ "required" || + "$line" =~ "cluster" && "$line" =~ "partition" ]]; then + + # Try to extract affected nodes if available + NODES=$(echo "$line" | grep -oP 'nodes: \K[^.]+' || + echo "$line" | grep -oP 'between \K[^.]+' || echo "") + + if [[ -n "$NODES" ]]; then + NODES=" (Affected nodes: $NODES)" + fi + + # Try to extract fence status if available + FENCE_INFO="" + if [[ "$line" =~ "fencing" ]]; then + if [[ "$line" =~ "successful" ]]; then + FENCE_INFO=" (Fencing successful)" + elif [[ "$line" =~ "failed" ]]; then + FENCE_INFO=" (Fencing failed)" + else + FENCE_INFO=" (Fencing required)" + fi + fi + + # Send notification with enhanced information + send_notification "🚨 $(translate "CRITICAL: Split-brain detected in cluster")$NODES$FENCE_INFO - $(translate "Manual intervention required!")" + + event_processed=true + fi + fi + + + + # Node disconnected from cluster (CRITICAL) + if [[ "$node_disconnect" -eq 1 ]] && [[ "$event_processed" = false ]]; then + # Expanded pattern matching for node disconnection + if [[ ("$line" =~ "quorum" && "$line" =~ "lost") || + ("$line" =~ "node" && "$line" =~ "left") || + ("$line" =~ "node" && "$line" =~ "offline") || + ("$line" =~ "connection" && "$line" =~ "lost" && "$line" =~ "node") ]]; then + + # Extract node name with improved pattern matching + NODE=$(echo "$line" | grep -oP 'node \K[^ ,.]+' || + echo "$line" | grep -oP 'Node \K[^ ,.]+' || + echo "$line" | grep -oP 'from \K[^ ,.]+' || echo "unknown") + + # Try to determine if quorum is still valid + QUORUM_STATUS="" + if [[ "$line" =~ "quorum" ]]; then + if [[ "$line" =~ "lost" ]]; then + QUORUM_STATUS=" (Quorum lost)" + elif [[ "$line" =~ "still" && "$line" =~ "valid" ]]; then + QUORUM_STATUS=" (Quorum still valid)" + fi + fi + + # Try to extract remaining nodes count if available + REMAINING="" + REMAINING_COUNT=$(echo "$line" | grep -oP 'remaining nodes: \K[0-9]+' || + echo "$line" | grep -oP 'nodes left: \K[0-9]+' || echo "") + if [[ -n "$REMAINING_COUNT" ]]; then + REMAINING=" ($REMAINING_COUNT nodes remaining)" + fi + + # Try to determine if this is expected or unexpected + EXPECTED="" + if [[ "$line" =~ "shutdown" || "$line" =~ "maintenance" ]]; then + EXPECTED=" (Planned)" + else + EXPECTED=" (Unexpected)" + fi + + # Send notification with enhanced information + send_notification "🚨 $(translate "CRITICAL: Node disconnected from cluster:") $NODE$QUORUM_STATUS$REMAINING$EXPECTED" + + event_processed=true + fi + fi + + + + + # ===== NON-CRITICAL EVENTS (IMMEDIATE) ===== + + # VM/CT start (NON-CRITICAL but immediate) + if [[ "$vm_start" -eq 1 ]] && [[ "$event_processed" = false ]]; then + # VM start detection + if [[ "$line" =~ "qmstart" && ! "$line" =~ "err" && ! "$line" =~ "fail" ]]; then + VM_ID=$(echo "$line" | grep -oP 'qmstart:\K[0-9]+' || + echo "$line" | grep -oP 'VM \K[0-9]+' || echo "") + + if [[ -n "$VM_ID" ]]; then + NAME=$(get_vm_name "$VM_ID") + + # Try to extract additional information + EXTRA_INFO="" + + # Check if this is a template + if [[ "$line" =~ "template" ]]; then + EXTRA_INFO=" (Template)" + fi + + # Check if this is a restore or clone operation + if [[ "$line" =~ "restore" ]]; then + EXTRA_INFO=" (Restored)" + elif [[ "$line" =~ "clone" ]]; then + EXTRA_INFO=" (Cloned)" + fi + + send_notification "✅ $(translate "VM started successfully:") $NAME$EXTRA_INFO" + event_processed=true + fi + # LXC container start detection + elif [[ "$line" =~ "lxc-start" && ! "$line" =~ "err" && ! "$line" =~ "fail" ]] || + [[ "$line" =~ "Starting CT" && ! "$line" =~ "err" && ! "$line" =~ "fail" ]]; then + + CT_ID=$(echo "$line" | grep -oP 'lxc-start:\K[0-9]+' || + echo "$line" | grep -oP 'CT \K[0-9]+' || echo "") + + if [[ -n "$CT_ID" ]]; then + NAME=$(get_vm_name "$CT_ID") + + # Try to extract additional information + EXTRA_INFO="" + + # Check if this is a template + if [[ "$line" =~ "template" ]]; then + EXTRA_INFO=" (Template)" + fi + + # Check if this is a restore or clone operation + if [[ "$line" =~ "restore" ]]; then + EXTRA_INFO=" (Restored)" + elif [[ "$line" =~ "clone" ]]; then + EXTRA_INFO=" (Cloned)" + fi + + send_notification "✅ $(translate "Container started successfully:") $NAME$EXTRA_INFO" + event_processed=true + fi + fi + fi + + + + # VM/CT shutdown (NON-CRITICAL but immediate) + if [[ "$vm_shutdown" -eq 1 ]] && [[ "$event_processed" = false ]]; then + # VM shutdown detection + if [[ "$line" =~ "qmstop" && ! "$line" =~ "err" && ! "$line" =~ "fail" ]]; then + VM_ID=$(echo "$line" | grep -oP 'qmstop:\K[0-9]+' || + echo "$line" | grep -oP 'VM \K[0-9]+' || echo "") + + if [[ -n "$VM_ID" ]]; then + NAME=$(get_vm_name "$VM_ID") + + # Try to determine shutdown type + SHUTDOWN_TYPE="" + if [[ "$line" =~ "force" || "$line" =~ "kill" ]]; then + SHUTDOWN_TYPE=" (Forced)" + elif [[ "$line" =~ "suspend" ]]; then + SHUTDOWN_TYPE=" (Suspended)" + elif [[ "$line" =~ "hibernate" ]]; then + SHUTDOWN_TYPE=" (Hibernated)" + elif [[ "$line" =~ "timeout" ]]; then + SHUTDOWN_TYPE=" (Timeout)" + elif [[ "$line" =~ "acpi" ]]; then + SHUTDOWN_TYPE=" (ACPI shutdown)" + fi + + send_notification "✅ $(translate "VM stopped successfully:") $NAME$SHUTDOWN_TYPE" + event_processed=true + fi + # LXC container shutdown detection + elif [[ "$line" =~ "lxc-stop" && ! "$line" =~ "err" && ! "$line" =~ "fail" ]] || + [[ "$line" =~ "Stopping CT" && ! "$line" =~ "err" && ! "$line" =~ "fail" ]]; then + + CT_ID=$(echo "$line" | grep -oP 'lxc-stop:\K[0-9]+' || + echo "$line" | grep -oP 'CT \K[0-9]+' || echo "") + + if [[ -n "$CT_ID" ]]; then + NAME=$(get_vm_name "$CT_ID") + + # Try to determine shutdown type + SHUTDOWN_TYPE="" + if [[ "$line" =~ "force" || "$line" =~ "kill" ]]; then + SHUTDOWN_TYPE=" (Forced)" + elif [[ "$line" =~ "timeout" ]]; then + SHUTDOWN_TYPE=" (Timeout)" + fi + + send_notification "✅ $(translate "Container stopped successfully:") $NAME$SHUTDOWN_TYPE" + event_processed=true + fi + fi + fi + + + + # VM/CT restart (NON-CRITICAL but immediate) + if [[ "$vm_restart" -eq 1 ]] && [[ "$event_processed" = false ]]; then + # VM restart detection + if [[ ("$line" =~ "qmreset" || "$line" =~ "qmreboot") && ! "$line" =~ "err" && ! "$line" =~ "fail" ]]; then + VM_ID=$(echo "$line" | grep -oP '(qmreset|qmreboot):\K[0-9]+' || + echo "$line" | grep -oP 'VM \K[0-9]+' || echo "") + + if [[ -n "$VM_ID" ]]; then + NAME=$(get_vm_name "$VM_ID") + + # Try to determine restart type + RESTART_TYPE="" + if [[ "$line" =~ "qmreset" ]]; then + RESTART_TYPE=" (Hard reset)" + elif [[ "$line" =~ "force" || "$line" =~ "kill" ]]; then + RESTART_TYPE=" (Forced)" + elif [[ "$line" =~ "timeout" ]]; then + RESTART_TYPE=" (After timeout)" + elif [[ "$line" =~ "acpi" ]]; then + RESTART_TYPE=" (ACPI restart)" + fi + + send_notification "✅ $(translate "VM restarted successfully:") $NAME$RESTART_TYPE" + event_processed=true + fi + # LXC container restart detection + elif [[ "$line" =~ "lxc-restart" || "$line" =~ "Restarting CT" || + ("$line" =~ "lxc-stop" && "$line" =~ "lxc-start" && "$line" =~ "restart") ]] && + [[ ! "$line" =~ "err" && ! "$line" =~ "fail" ]]; then + + CT_ID=$(echo "$line" | grep -oP 'lxc-restart:\K[0-9]+' || + echo "$line" | grep -oP 'CT \K[0-9]+' || + echo "$line" | grep -oP 'lxc-(stop|start):\K[0-9]+' || echo "") + + if [[ -n "$CT_ID" ]]; then + NAME=$(get_vm_name "$CT_ID") + + # Try to determine restart type + RESTART_TYPE="" + if [[ "$line" =~ "force" || "$line" =~ "kill" ]]; then + RESTART_TYPE=" (Forced)" + elif [[ "$line" =~ "timeout" ]]; then + RESTART_TYPE=" (After timeout)" + fi + + send_notification "✅ $(translate "Container restarted successfully:") $NAME$RESTART_TYPE" + event_processed=true + fi + fi + fi + + + + # Snapshot completed (NON-CRITICAL but immediate) + if [[ "$line" =~ "snapshot" ]] && [[ "$snapshot_complete" -eq 1 ]] && [[ ! "$line" =~ "error" ]] && [[ "$event_processed" = false ]]; then + # Additional pattern matching for completed snapshots + if [[ "$line" =~ "complete" || "$line" =~ "finished" || "$line" =~ "success" || ! "$line" =~ "fail" && ! "$line" =~ "unable" ]]; then + + # Extract VM/CT ID with improved pattern matching + VM_ID=$(echo "$line" | grep -oP 'TASK \K[0-9]+' || + echo "$line" | grep -oP 'VM \K[0-9]+' || + echo "$line" | grep -oP 'CT \K[0-9]+' || echo "") + + # Try to extract snapshot name/ID if available + SNAPSHOT_NAME=$(echo "$line" | grep -oP 'snapshot \K[a-zA-Z0-9_-]+' || + echo "$line" | grep -oP 'snap\K[a-zA-Z0-9_-]+' || + echo "$line" | grep -oP 'name: \K[a-zA-Z0-9_-]+' || echo "") + + # Try to extract snapshot size if available + SNAPSHOT_SIZE=$(echo "$line" | grep -oP 'size: \K[0-9.]+[KMGT]B' || + echo "$line" | grep -oP '[0-9.]+[KMGT]B' || echo "") + + # Try to extract duration if available + DURATION=$(echo "$line" | grep -oP 'duration: \K[0-9.]+s' || + echo "$line" | grep -oP 'in \K[0-9.]+s' || + echo "$line" | grep -oP 'took \K[0-9.]+s' || echo "") + + # Format additional information + ADDITIONAL_INFO="" + if [[ -n "$SNAPSHOT_NAME" ]]; then + ADDITIONAL_INFO+=" (Name: $SNAPSHOT_NAME" + + if [[ -n "$SNAPSHOT_SIZE" ]]; then + ADDITIONAL_INFO+=", Size: $SNAPSHOT_SIZE" + fi + + if [[ -n "$DURATION" ]]; then + ADDITIONAL_INFO+=", Duration: $DURATION" + fi + + ADDITIONAL_INFO+=")" + elif [[ -n "$SNAPSHOT_SIZE" || -n "$DURATION" ]]; then + ADDITIONAL_INFO+=" (" + + if [[ -n "$SNAPSHOT_SIZE" ]]; then + ADDITIONAL_INFO+="Size: $SNAPSHOT_SIZE" + + if [[ -n "$DURATION" ]]; then + ADDITIONAL_INFO+=", " + fi + fi + + if [[ -n "$DURATION" ]]; then + ADDITIONAL_INFO+="Duration: $DURATION" + fi + + ADDITIONAL_INFO+=")" + fi + + # Try to determine snapshot type + SNAPSHOT_TYPE="" + if [[ "$line" =~ "memory" || "$line" =~ "ram" ]]; then + SNAPSHOT_TYPE=" (With RAM)" + elif [[ "$line" =~ "disk-only" ]]; then + SNAPSHOT_TYPE=" (Disk only)" + fi + + # Format the notification message + if [[ -n "$VM_ID" ]]; then + NAME=$(get_vm_name "$VM_ID") + send_notification "✅ $(translate "Snapshot completed for:") $NAME$ADDITIONAL_INFO$SNAPSHOT_TYPE" + else + send_notification "✅ $(translate "Snapshot completed")$ADDITIONAL_INFO$SNAPSHOT_TYPE" + fi + + event_processed=true + fi + fi + + + + # Backup completed (NON-CRITICAL but immediate) + if [[ "$line" =~ "backup" ]] && [[ "$backup_complete" -eq 1 ]] && [[ "$line" =~ "successful" || "$line" =~ "complete" || "$line" =~ "finished" || "$line" =~ "success" ]] && [[ ! "$line" =~ "error" ]] && [[ ! "$line" =~ "fail" ]] && [[ "$event_processed" = false ]]; then + # Extract VM/CT ID with improved pattern matching + VM_ID=$(echo "$line" | grep -oP 'TASK \K[0-9]+' || + echo "$line" | grep -oP 'VM \K[0-9]+' || + echo "$line" | grep -oP 'CT \K[0-9]+' || echo "") + + # Try to extract backup target/storage if available + BACKUP_TARGET=$(echo "$line" | grep -oP 'to ["\047]?\K[a-zA-Z0-9_-]+' || + echo "$line" | grep -oP 'storage ["\047]?\K[a-zA-Z0-9_-]+' || + echo "$line" | grep -oP 'target ["\047]?\K[a-zA-Z0-9_-]+' || echo "") + + # Try to extract backup size if available + BACKUP_SIZE=$(echo "$line" | grep -oP 'size: \K[0-9.]+[KMGT]B' || + echo "$line" | grep -oP '[0-9.]+[KMGT]B' || echo "") + + # Try to extract duration if available + DURATION=$(echo "$line" | grep -oP 'duration: \K[0-9.]+s' || + echo "$line" | grep -oP 'in \K[0-9.]+s' || + echo "$line" | grep -oP 'took \K[0-9.]+s' || echo "") + + # Try to extract compression rate if available + COMPRESSION=$(echo "$line" | grep -oP 'compression: \K[0-9.]+%' || + echo "$line" | grep -oP 'compressed: \K[0-9.]+%' || echo "") + + # Format additional information + ADDITIONAL_INFO="" + if [[ -n "$BACKUP_TARGET" || -n "$BACKUP_SIZE" || -n "$DURATION" || -n "$COMPRESSION" ]]; then + ADDITIONAL_INFO+=" (" + + if [[ -n "$BACKUP_TARGET" ]]; then + ADDITIONAL_INFO+="Target: $BACKUP_TARGET" + + if [[ -n "$BACKUP_SIZE" || -n "$DURATION" || -n "$COMPRESSION" ]]; then + ADDITIONAL_INFO+=", " + fi + fi + + if [[ -n "$BACKUP_SIZE" ]]; then + ADDITIONAL_INFO+="Size: $BACKUP_SIZE" + + if [[ -n "$DURATION" || -n "$COMPRESSION" ]]; then + ADDITIONAL_INFO+=", " + fi + fi + + if [[ -n "$DURATION" ]]; then + ADDITIONAL_INFO+="Duration: $DURATION" + + if [[ -n "$COMPRESSION" ]]; then + ADDITIONAL_INFO+=", " + fi + fi + + if [[ -n "$COMPRESSION" ]]; then + ADDITIONAL_INFO+="Compression: $COMPRESSION" + fi + + ADDITIONAL_INFO+=")" + fi + + # Try to determine backup type + BACKUP_TYPE="" + if [[ "$line" =~ "incremental" ]]; then + BACKUP_TYPE=" (Incremental)" + elif [[ "$line" =~ "differential" ]]; then + BACKUP_TYPE=" (Differential)" + elif [[ "$line" =~ "full" ]]; then + BACKUP_TYPE=" (Full)" + fi + + # Format the notification message + if [[ -n "$VM_ID" ]]; then + NAME=$(get_vm_name "$VM_ID") + send_notification "✅ $(translate "Backup completed for:") $NAME$ADDITIONAL_INFO$BACKUP_TYPE" + else + send_notification "✅ $(translate "Backup completed")$ADDITIONAL_INFO$BACKUP_TYPE" + fi + + event_processed=true + fi + + +# Function: capture direct system events +capture_direct_events() { + + # Variables to control notification frequency + local last_load_notification=0 + local last_temp_notification=0 + local last_disk_space_notification=0 + local last_cpu_notification=0 + local last_ram_notification=0 + local last_update_notification=0 + + + local resource_interval=900 # 15 minutes for resources + local update_interval=86400 # 24 hours for updates + + + local disk_full_detected=false + + while true; do + current_time=$(date +%s) + + # ===== CRITICAL IMMEDIATE NOTIFICATION EVENTS ===== + + # Disk full (CRITICAL - immediate) + if [[ "$disk_full" -eq 1 ]]; then + # Check for disks that are completely full (100%) + full_disks=$(df -h | awk '$5 == "100%" {print $1 " (100% full)"}') + + # Check for disks that are nearly full (>=95%) + nearly_full_disks=$(df -h | awk '$5 >= "95%" && $5 < "100%" {print $1 " (" $5 " full)"}') + + # Handle completely full disks + if [[ -n "$full_disks" && "$disk_full_detected" = false ]]; then + # Format the output for better readability + formatted_full_disks=$(echo "$full_disks" | tr '\n' ', ' | sed 's/,$//' | sed 's/,/, /g') + + send_notification "🚨 $(translate "CRITICAL: Storage completely full:") $formatted_full_disks" + disk_full_detected=true + + # Log the event + logger -t proxmox-notify "CRITICAL: Storage completely full: $formatted_full_disks" + elif [[ -z "$full_disks" ]]; then + disk_full_detected=false + fi + + # Handle nearly full disks (separate notification) + if [[ -n "$nearly_full_disks" && "$disk_nearly_full_detected" = false ]]; then + # Format the output for better readability + formatted_nearly_full_disks=$(echo "$nearly_full_disks" | tr '\n' ', ' | sed 's/,$//' | sed 's/,/, /g') + + send_notification "⚠️ $(translate "WARNING: Storage nearly full:") $formatted_nearly_full_disks" + disk_nearly_full_detected=true + + # Log the event + logger -t proxmox-notify "WARNING: Storage nearly full: $formatted_nearly_full_disks" + elif [[ -z "$nearly_full_disks" ]]; then + disk_nearly_full_detected=false + fi + + # Check for inode usage (sometimes disks can be full of inodes but not space) + full_inodes=$(df -i | awk '$5 >= "95%" {print $1 " (" $5 " inodes used)"}') + if [[ -n "$full_inodes" && "$inode_full_detected" = false ]]; then + # Format the output for better readability + formatted_full_inodes=$(echo "$full_inodes" | tr '\n' ', ' | sed 's/,$//' | sed 's/,/, /g') + + send_notification "⚠️ $(translate "WARNING: Inode usage critical:") $formatted_full_inodes" + inode_full_detected=true + + # Log the event + logger -t proxmox-notify "WARNING: Inode usage critical: $formatted_full_inodes" + elif [[ -z "$full_inodes" ]]; then + inode_full_detected=false + fi + fi + + + + # ===== NON-CRITICAL EVENTS WITH INTERVAL ===== + + # High system load (NON-CRITICAL - with interval) + if [[ "$system_load_high" -eq 1 ]]; then + # Get current load averages (1, 5, 15 minutes) + load_1=$(awk '{print $1}' /proc/loadavg) + load_5=$(awk '{print $2}' /proc/loadavg) + load_15=$(awk '{print $3}' /proc/loadavg) + + # Get number of CPU cores + if [[ -f /proc/cpuinfo ]]; then + cpu_cores=$(grep -c "^processor" /proc/cpuinfo) + else + # Default to 1 if we can't determine + cpu_cores=1 + fi + + # Calculate thresholds based on number of cores + warning_threshold=$(echo "$cpu_cores * 0.8" | bc -l) + critical_threshold=$(echo "$cpu_cores * 1.5" | bc -l) + + # Format load averages for display + load_info="1m: $load_1, 5m: $load_5, 15m: $load_15" + + # Check if load exceeds critical threshold + if (( $(echo "$load_1 > $critical_threshold" | bc -l) )) && + (( current_time - last_load_notification > resource_interval )); then + + # Get top processes consuming CPU + if command -v top &>/dev/null; then + top_processes=$(top -b -n 1 | head -n 12 | tail -n 5 | awk '{print $NF " (" $9 "% CPU)"}' | tr '\n' ', ' | sed 's/,$//') + process_info=" $(translate "Top processes:") $top_processes" + else + process_info="" + fi + + # Get memory usage + if [[ -f /proc/meminfo ]]; then + mem_total=$(grep "MemTotal" /proc/meminfo | awk '{print $2}') + mem_available=$(grep "MemAvailable" /proc/meminfo | awk '{print $2}') + mem_used_percent=$(echo "scale=1; 100 - ($mem_available * 100 / $mem_total)" | bc -l) + memory_info=" $(translate "Memory usage:") ${mem_used_percent}%" + else + memory_info="" + fi + + send_notification "🚨 $(translate "CRITICAL: Extremely high system load:") $load_info ($(translate "on") $cpu_cores $(translate "cores"))$memory_info$process_info" + last_load_notification=$current_time + + # Log the event + logger -t proxmox-notify "CRITICAL: Extremely high system load: $load_info" + + # Check if load exceeds warning threshold + elif (( $(echo "$load_1 > $warning_threshold" | bc -l) )) && + (( current_time - last_load_notification > resource_interval )); then + + # Get memory usage + if [[ -f /proc/meminfo ]]; then + mem_total=$(grep "MemTotal" /proc/meminfo | awk '{print $2}') + mem_available=$(grep "MemAvailable" /proc/meminfo | awk '{print $2}') + mem_used_percent=$(echo "scale=1; 100 - ($mem_available * 100 / $mem_total)" | bc -l) + memory_info=" $(translate "Memory usage:") ${mem_used_percent}%" + else + memory_info="" + fi + + send_notification "⚠️ $(translate "WARNING: High system load:") $load_info ($(translate "on") $cpu_cores $(translate "cores"))$memory_info" + last_load_notification=$current_time + + # Log the event + logger -t proxmox-notify "WARNING: High system load: $load_info" + fi + fi + + + + # Available updates (NON-CRITICAL - with daily interval) + if [[ "$update_available" -eq 1 ]] && (( current_time - last_update_notification > update_interval )); then + # Update package lists quietly + apt-get update -qq &>/dev/null + + # Count total upgradable packages + updates=$(apt list --upgradable 2>/dev/null | grep -v "Listing..." | wc -l) + + # Check for security updates specifically + security_updates=$(apt list --upgradable 2>/dev/null | grep -i security | wc -l) + + # Check for Proxmox VE updates specifically + proxmox_updates=$(apt list --upgradable 2>/dev/null | grep -E "^(proxmox-ve|pve-manager|pve-kernel|pve-container|pve-firewall|pve-ha-manager|pve-docs|pve-qemu-kvm|pve-storage|pve-cluster|pve-gui|pve-headers|pve-firmware|pve-zsync|pve-guest-common)" | wc -l) + + # Get Proxmox version information + current_pve_version=$(pveversion -v 2>/dev/null | grep -oP "pve-manager/\K[0-9]+\.[0-9]+" || echo "unknown") + + # Check if there's a new major Proxmox version available + new_pve_version="" + if [[ $proxmox_updates -gt 0 ]]; then + new_version_check=$(apt list --upgradable 2>/dev/null | grep "^pve-manager/" | grep -oP "pve-manager/\K[0-9]+\.[0-9]+" || echo "") + if [[ -n "$new_version_check" && "$new_version_check" != "$current_pve_version" ]]; then + new_pve_version="$new_version_check" + fi + fi + + # Get list of specific packages that have updates + if [[ $updates -gt 0 ]]; then + # Get a list of all upgradable packages (limited to 10 to avoid too long messages) + package_list=$(apt list --upgradable 2>/dev/null | grep -v "Listing..." | head -n 10 | awk -F/ '{print $1}' | tr '\n' ', ' | sed 's/,$//') + + # If there are more than 10 packages, indicate this + if [[ $updates -gt 10 ]]; then + package_list="$package_list, ... ($(translate "and") $((updates-10)) $(translate "more"))" + fi + + # Format the notification message + update_msg="ℹ️ $(translate "Updates available:") $updates" + + if [[ $security_updates -gt 0 ]]; then + update_msg="$update_msg ($(translate "including") $security_updates $(translate "security updates"))" + fi + + # Add package list + update_msg="$update_msg. $(translate "Packages:") $package_list" + + # If there's a new Proxmox version, highlight it + if [[ -n "$new_pve_version" ]]; then + update_msg="🔄 $(translate "NEW PROXMOX VERSION AVAILABLE:") $new_pve_version ($(translate "current:") $current_pve_version) + + $update_msg" + elif [[ $proxmox_updates -gt 0 ]]; then + update_msg="🔄 $(translate "Proxmox updates available") ($proxmox_updates $(translate "packages")) + + $update_msg" + fi + + send_notification "$update_msg" + last_update_notification=$current_time + + # Log the event + logger -t proxmox-notify "Updates available: $updates packages" + fi + fi + + + + # Low disk space (NON-CRITICAL - with interval) + if [[ "$low_disk_space" -eq 1 ]] && (( current_time - last_disk_space_notification > resource_interval )); then + # Check partitions with critical space (95-99% usage) + critical_space=$(df -h | awk '$5 ~ /9[5-9]%/ && $5 != "100%" {print $1 " (" $5 " full, " $4 " free)"}') + + # Check partitions with warning space (90-94% usage) + warning_space=$(df -h | awk '$5 ~ /9[0-4]%/ {print $1 " (" $5 " full, " $4 " free)"}') + + # Check partitions with attention space (85-89% usage) + attention_space=$(df -h | awk '$5 ~ /8[5-9]%/ {print $1 " (" $5 " full, " $4 " free)"}') + + # Format messages for better readability + if [[ -n "$critical_space" ]]; then + critical_space=$(echo "$critical_space" | tr '\n' ', ' | sed 's/,$//' | sed 's/,/, /g') + fi + + if [[ -n "$warning_space" ]]; then + warning_space=$(echo "$warning_space" | tr '\n' ', ' | sed 's/,$//' | sed 's/,/, /g') + fi + + if [[ -n "$attention_space" ]]; then + attention_space=$(echo "$attention_space" | tr '\n' ', ' | sed 's/,$//' | sed 's/,/, /g') + fi + + # Build notification message + disk_space_msg="" + + if [[ -n "$critical_space" ]]; then + disk_space_msg+="🚨 $(translate "CRITICAL: Very low disk space:") $critical_space" + fi + + if [[ -n "$warning_space" ]]; then + if [[ -n "$disk_space_msg" ]]; then + disk_space_msg+=" + + " + fi + disk_space_msg+="⚠️ $(translate "WARNING: Low disk space:") $warning_space" + fi + + if [[ -n "$attention_space" && -z "$critical_space" && -z "$warning_space" ]]; then + # Only show attention level if no higher alerts are present + disk_space_msg+="ℹ️ $(translate "ATTENTION: Disk space getting low:") $attention_space" + fi + + # Send notification if any space issues were detected + if [[ -n "$disk_space_msg" ]]; then + send_notification "$disk_space_msg" + last_disk_space_notification=$current_time + + # Log the event + logger -t proxmox-notify "Low disk space detected" + + # Suggest cleanup options for Proxmox + if [[ -d /var/lib/vz/dump || -d /var/lib/vz/template ]]; then + cleanup_msg="$(translate "TIP: Consider cleaning up old backups with:") 'rm -f /var/lib/vz/dump/vzdump-*.tar' $(translate "or old templates with:") 'rm -f /var/lib/vz/template/cache/*.tar.gz'" + send_notification "$cleanup_msg" + fi + fi + fi + + + + # High CPU usage (NON-CRITICAL - with interval) + if [[ "$cpu_high" -eq 1 ]] && (( current_time - last_cpu_notification > resource_interval )); then + # Get number of CPU cores + if [[ -f /proc/cpuinfo ]]; then + cpu_cores=$(grep -c "^processor" /proc/cpuinfo) + else + # Default to 1 if we can't determine + cpu_cores=1 + fi + + # Use mpstat if available, otherwise use top + if command -v mpstat &>/dev/null; then + cpu_usage=$(mpstat 1 1 | awk '/Average:/ {print 100 - $NF}') + else + cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2 + $4}') + fi + + # Round to one decimal place + cpu_usage=$(printf "%.1f" $cpu_usage) + + # Get CPU temperature if available + cpu_temp="" + if command -v sensors &>/dev/null; then + # Try to get CPU temperature from sensors + cpu_temp=$(sensors | grep -i "core\|temp" | grep -oP '\+\K[0-9.]+°C' | sort -nr | head -n1) + elif [[ -f /sys/class/thermal/thermal_zone0/temp ]]; then + # Alternative method using sysfs + cpu_temp=$(echo "scale=1; $(cat /sys/class/thermal/thermal_zone0/temp) / 1000" | bc -l) + cpu_temp="${cpu_temp}°C" + fi + + # Add temperature info if available + temp_info="" + if [[ -n "$cpu_temp" ]]; then + temp_info=" ($(translate "Temperature:") $cpu_temp)" + fi + + # Get top CPU consuming processes + process_info="" + if command -v top &>/dev/null; then + top_processes=$(top -bn1 -o %CPU | head -n 12 | tail -n 5 | awk '{print $NF " (" $9 "%)"}' | tr '\n' ', ' | sed 's/,$//') + process_info=" + $(translate "Top processes:") $top_processes" + fi + + # Check for critical CPU usage (>95%) + if (( $(echo "$cpu_usage > 95" | bc -l) )); then + send_notification "🚨 $(translate "CRITICAL: Very high CPU usage:") ${cpu_usage}% ($(translate "on") $cpu_cores $(translate "cores"))${temp_info}${process_info}" + last_cpu_notification=$current_time + + # Log the event + logger -t proxmox-notify "CRITICAL: Very high CPU usage: ${cpu_usage}%" + + # Check for high CPU usage (>85%) + elif (( $(echo "$cpu_usage > 85" | bc -l) )); then + send_notification "⚠️ $(translate "WARNING: High CPU usage:") ${cpu_usage}% ($(translate "on") $cpu_cores $(translate "cores"))${temp_info}${process_info}" + last_cpu_notification=$current_time + + # Log the event + logger -t proxmox-notify "WARNING: High CPU usage: ${cpu_usage}%" + fi + + # Check for sustained moderate CPU usage (>70% for extended period) + # This requires tracking previous readings + if [[ -z "$cpu_usage_history" ]]; then + cpu_usage_history="$cpu_usage" + else + cpu_usage_history="$cpu_usage_history,$cpu_usage" + + # Keep only the last 5 readings + cpu_usage_history=$(echo "$cpu_usage_history" | awk -F, '{for(i=NF-4>1?NF-4:1; i<=NF; i++) printf("%s%s", $i, i==NF?"":",") }') + + # Calculate average of last readings + cpu_usage_avg=$(echo "$cpu_usage_history" | awk -F, '{sum=0; for(i=1; i<=NF; i++) sum+=$i; print sum/NF}') + + # If average is >70% and we haven't sent a notification recently + if (( $(echo "$cpu_usage_avg > 70" | bc -l) )) && + (( current_time - last_cpu_sustained_notification > resource_interval * 3 )); then + send_notification "ℹ️ $(translate "ATTENTION: Sustained CPU usage:") ${cpu_usage_avg}% $(translate "average over time") ($(translate "on") $cpu_cores $(translate "cores"))${temp_info}" + last_cpu_sustained_notification=$current_time + + # Log the event + logger -t proxmox-notify "ATTENTION: Sustained CPU usage: ${cpu_usage_avg}%" + fi + fi + fi + + + + # High RAM usage (NON-CRITICAL - with interval) + if [[ "$ram_high" -eq 1 ]] && (( current_time - last_ram_notification > resource_interval )); then + # Get detailed memory information + total_ram=$(free -m | awk '/Mem:/ {print $2}') + used_ram=$(free -m | awk '/Mem:/ {print $3}') + free_ram=$(free -m | awk '/Mem:/ {print $4}') + shared_ram=$(free -m | awk '/Mem:/ {print $5}') + cache_ram=$(free -m | awk '/Mem:/ {print $6}') + available_ram=$(free -m | awk '/Mem:/ {print $7}') + + # Calculate percentages + ram_usage=$(echo "scale=1; ($total_ram - $available_ram) * 100 / $total_ram" | bc -l) + ram_usage_no_cache=$(echo "scale=1; ($used_ram - $cache_ram) * 100 / $total_ram" | bc -l) + + # Get swap information + total_swap=$(free -m | awk '/Swap:/ {print $2}') + used_swap=$(free -m | awk '/Swap:/ {print $3}') + + # Calculate swap percentage if swap exists + swap_info="" + if [[ $total_swap -gt 0 ]]; then + swap_percent=$(echo "scale=1; $used_swap * 100 / $total_swap" | bc -l) + swap_info=", $(translate "Swap:") ${swap_percent}% (${used_swap}MB/${total_swap}MB)" + fi + + # Format memory values for display + ram_info="${ram_usage}% (${used_ram}MB/${total_ram}MB)" + ram_info_detailed="$(translate "Used:") ${used_ram}MB, $(translate "Free:") ${free_ram}MB, $(translate "Cache:") ${cache_ram}MB, $(translate "Available:") ${available_ram}MB" + + # Get top memory consuming processes + process_info="" + if command -v ps &>/dev/null; then + top_processes=$(ps aux --sort=-%mem | head -n 6 | tail -n 5 | awk '{print $11 " (" int($4) "%)"}' | tr '\n' ', ' | sed 's/,$//') + process_info=" + $(translate "Top processes:") $top_processes" + fi + + # Check for critical RAM usage (>95%) + if (( $(echo "$ram_usage > 95" | bc -l) )); then + send_notification "🚨 $(translate "CRITICAL: Very high RAM usage:") ${ram_info}${swap_info} + ${ram_info_detailed}${process_info}" + last_ram_notification=$current_time + + # Log the event + logger -t proxmox-notify "CRITICAL: Very high RAM usage: ${ram_usage}%" + + # Check for high RAM usage (>85%) + elif (( $(echo "$ram_usage > 85" | bc -l) )); then + send_notification "⚠️ $(translate "WARNING: High RAM usage:") ${ram_info}${swap_info} + ${ram_info_detailed}${process_info}" + last_ram_notification=$current_time + + # Log the event + logger -t proxmox-notify "WARNING: High RAM usage: ${ram_usage}%" + + # Check for high RAM usage excluding cache (>80%) + # This is important because Linux uses free RAM for cache, but can free it when needed + elif (( $(echo "$ram_usage_no_cache > 80" | bc -l) )); then + send_notification "ℹ️ $(translate "ATTENTION: High RAM usage (excluding cache):") ${ram_usage_no_cache}%${swap_info} + ${ram_info_detailed}${process_info}" + last_ram_notification=$current_time + + # Log the event + logger -t proxmox-notify "ATTENTION: High RAM usage (excluding cache): ${ram_usage_no_cache}%" + fi + + # Check for high swap usage if swap exists and is being used + if [[ $total_swap -gt 0 && $used_swap -gt 0 ]]; then + # Only alert on high swap if we haven't already alerted on RAM + if (( $(echo "$swap_percent > 50" | bc -l) )) && + (( $(echo "$ram_usage <= 85" | bc -l) )) && + (( current_time - last_swap_notification > resource_interval )); then + send_notification "⚠️ $(translate "WARNING: High swap usage:") ${swap_percent}% (${used_swap}MB/${total_swap}MB) + ${ram_info_detailed}${process_info}" + last_swap_notification=$current_time + + # Log the event + logger -t proxmox-notify "WARNING: High swap usage: ${swap_percent}%" + fi + fi + fi + + + + # High temperature (NON-CRITICAL - with interval) + if [[ "$temp_high" -eq 1 ]] && (( current_time - last_temp_notification > resource_interval )); then + # Initialize variables + temp_detected=false + max_temp=0 + temp_sources="" + + # Method 1: Use 'sensors' command if available + if command -v sensors &>/dev/null; then + # Update sensors database if needed + if [[ ! -f /var/run/proxmox-notify-sensors-updated ]]; then + sensors-detect --auto &>/dev/null || true + touch /var/run/proxmox-notify-sensors-updated + fi + + # Try to get CPU temperature from various patterns + cpu_temp=$(sensors | grep -E 'Package id 0:|Core [0-9]+:|CPU:|Tdie:|Tctl:' | grep -oP '\+\K[0-9.]+°C|[0-9.]+°C' | sed 's/°C//' | sort -nr | head -n1) + + if [[ -n "$cpu_temp" && "$cpu_temp" != "0" ]]; then + temp_detected=true + if (( $(echo "$cpu_temp > $max_temp" | bc -l) )); then + max_temp=$cpu_temp + temp_sources="CPU" + fi + fi + + # Try to get motherboard/system temperature + mb_temp=$(sensors | grep -E 'MB Temperature|System Temp|Board Temp|Motherboard' | grep -oP '\+\K[0-9.]+°C|[0-9.]+°C' | sed 's/°C//' | sort -nr | head -n1) + + if [[ -n "$mb_temp" && "$mb_temp" != "0" ]]; then + temp_detected=true + if (( $(echo "$mb_temp > $max_temp" | bc -l) )); then + max_temp=$mb_temp + temp_sources="Motherboard" + elif (( $(echo "$mb_temp == $max_temp" | bc -l) )); then + temp_sources="$temp_sources, Motherboard" + fi + fi + + # Try to get GPU temperature if available + gpu_temp=$(sensors | grep -E 'GPU|VGA' | grep -oP '\+\K[0-9.]+°C|[0-9.]+°C' | sed 's/°C//' | sort -nr | head -n1) + + if [[ -n "$gpu_temp" && "$gpu_temp" != "0" ]]; then + temp_detected=true + if (( $(echo "$gpu_temp > $max_temp" | bc -l) )); then + max_temp=$gpu_temp + temp_sources="GPU" + elif (( $(echo "$gpu_temp == $max_temp" | bc -l) )); then + temp_sources="$temp_sources, GPU" + fi + fi + + # Try to get disk temperature if available + disk_temp=$(sensors | grep -E 'Drive Temp|Disk Temp|Storage Temp' | grep -oP '\+\K[0-9.]+°C|[0-9.]+°C' | sed 's/°C//' | sort -nr | head -n1) + + if [[ -n "$disk_temp" && "$disk_temp" != "0" ]]; then + temp_detected=true + if (( $(echo "$disk_temp > $max_temp" | bc -l) )); then + max_temp=$disk_temp + temp_sources="Disk" + elif (( $(echo "$disk_temp == $max_temp" | bc -l) )); then + temp_sources="$temp_sources, Disk" + fi + fi + fi + + # Method 2: Use sysfs thermal zones if sensors not available or no temp detected + if ! $temp_detected && [[ -d /sys/class/thermal ]]; then + for zone in /sys/class/thermal/thermal_zone*/temp; do + if [[ -f "$zone" ]]; then + zone_temp=$(echo "scale=1; $(cat "$zone") / 1000" | bc -l) + + if [[ -n "$zone_temp" && "$zone_temp" != "0" ]]; then + temp_detected=true + if (( $(echo "$zone_temp > $max_temp" | bc -l) )); then + max_temp=$zone_temp + # Try to get zone type + zone_dir=$(dirname "$zone") + if [[ -f "$zone_dir/type" ]]; then + zone_type=$(cat "$zone_dir/type") + temp_sources="$zone_type" + else + temp_sources="Thermal Zone" + fi + fi + fi + fi + done + fi + + # Method 3: Use ipmitool if available and no temp detected yet + if ! $temp_detected && command -v ipmitool &>/dev/null; then + ipmi_temp=$(ipmitool sdr type temperature 2>/dev/null | grep -i -E 'CPU|System|Ambient|Inlet|Exhaust' | head -1 | awk '{print $4}') + + if [[ -n "$ipmi_temp" && "$ipmi_temp" != "0" ]]; then + temp_detected=true + max_temp=$ipmi_temp + temp_sources="IPMI" + fi + fi + + # Method 4: Use hddtemp for disk temperatures if available + if command -v hddtemp &>/dev/null; then + for disk in /dev/sd[a-z]; do + if [[ -b "$disk" ]]; then + disk_temp=$(hddtemp "$disk" 2>/dev/null | grep -oP '[0-9.]+°C' | sed 's/°C//') + + if [[ -n "$disk_temp" && "$disk_temp" != "0" ]]; then + temp_detected=true + if (( $(echo "$disk_temp > $max_temp" | bc -l) )); then + max_temp=$disk_temp + disk_name=$(basename "$disk") + temp_sources="Disk $disk_name" + elif (( $(echo "$disk_temp == $max_temp" | bc -l) )); then + disk_name=$(basename "$disk") + temp_sources="$temp_sources, Disk $disk_name" + fi + fi + fi + done + fi + + # If we detected a temperature, check against thresholds + if $temp_detected && [[ -n "$max_temp" && "$max_temp" != "0" ]]; then + # Critical temperature (>90°C) + if (( $(echo "$max_temp > 90" | bc -l) )); then + send_notification "🚨 $(translate "CRITICAL: Dangerously high temperature:") ${max_temp}°C (${temp_sources})" + last_temp_notification=$current_time + + # Log the event + logger -t proxmox-notify "CRITICAL: Dangerously high temperature: ${max_temp}°C (${temp_sources})" + + # High temperature (>80°C) + elif (( $(echo "$max_temp > 80" | bc -l) )); then + send_notification "⚠️ $(translate "WARNING: High temperature:") ${max_temp}°C (${temp_sources})" + last_temp_notification=$current_time + + # Log the event + logger -t proxmox-notify "WARNING: High temperature: ${max_temp}°C (${temp_sources})" + + # Elevated temperature (>70°C) + elif (( $(echo "$max_temp > 70" | bc -l) )); then + send_notification "ℹ️ $(translate "ATTENTION: Elevated temperature:") ${max_temp}°C (${temp_sources})" + last_temp_notification=$current_time + + # Log the event + logger -t proxmox-notify "ATTENTION: Elevated temperature: ${max_temp}°C (${temp_sources})" + fi + fi + fi + + # Pause between checks + sleep 30 + done +} + + + -# Función para iniciar el servicio de notificaciones + + + +# Function to start the notification service start_notification_service() { if [[ ! -f /etc/systemd/system/proxmox-telegram.service ]]; then install_systemd_service fi if systemctl is-active --quiet proxmox-telegram.service; then - whiptail --title "$(translate "Información")" \ - --msgbox "$(translate "El servicio de notificaciones ya está en ejecución.")" 10 70 + whiptail --title "$(translate "Information")" \ + --msgbox "$(translate "The notification service is already running.")" 10 70 else systemctl start proxmox-telegram.service if systemctl is-active --quiet proxmox-telegram.service; then - whiptail --title "$(translate "Iniciado")" \ - --msgbox "$(translate "El servicio se ha iniciado correctamente.")" 10 70 + whiptail --title "$(translate "Started")" \ + --msgbox "$(translate "The service has been started successfully.")" 10 70 else whiptail --title "$(translate "Error")" \ - --msgbox "$(translate "No se pudo iniciar el servicio de notificaciones.")" 10 70 + --msgbox "$(translate "Could not start the notification service.")" 10 70 fi fi } -# Función para detener el servicio +# Function to stop the service stop_notification_service() { if [[ -f /etc/systemd/system/proxmox-telegram.service ]]; then if systemctl is-active --quiet proxmox-telegram.service; then @@ -592,32 +1822,32 @@ stop_notification_service() { fi if ! systemctl is-active --quiet proxmox-telegram.service; then - whiptail --title "$(translate "Detenido")" \ - --msgbox "$(translate "El servicio ha sido detenido correctamente.")" 10 70 + whiptail --title "$(translate "Stopped")" \ + --msgbox "$(translate "The service has been stopped successfully.")" 10 70 else whiptail --title "$(translate "Error")" \ - --msgbox "$(translate "No se pudo detener el servicio de notificaciones.")" 10 70 + --msgbox "$(translate "Could not stop the notification service.")" 10 70 fi else - whiptail --title "$(translate "Información")" \ - --msgbox "$(translate "El servicio de notificaciones no está instalado aún.")" 10 70 + whiptail --title "$(translate "Information")" \ + --msgbox "$(translate "The notification service is not installed yet.")" 10 70 fi } -# Función para verificar el estado del servicio +# Function to check service status check_service_status() { clear if [[ -f /etc/systemd/system/proxmox-telegram.service ]]; then systemctl status proxmox-telegram.service else - echo "$(translate "El servicio no está instalado.")" + echo "$(translate "The service is not installed.")" fi echo - echo "$(translate "Pulsa Enter para volver al menú...")" + msg_success "$(translate "Press Enter to return to the menu...")" read -r } -# Función para eliminar el servicio systemd +# Function to remove the systemd service remove_systemd_service() { if [[ -f /etc/systemd/system/proxmox-telegram.service ]]; then if systemctl is-active --quiet proxmox-telegram.service; then @@ -626,17 +1856,17 @@ remove_systemd_service() { systemctl disable proxmox-telegram.service rm -f /etc/systemd/system/proxmox-telegram.service systemctl daemon-reexec - whiptail --title "$(translate "Eliminado")" \ - --msgbox "$(translate "El servicio ha sido eliminado correctamente. Puedes reinstalarlo desde el menú si lo deseas.")" 10 70 + whiptail --title "$(translate "Removed")" \ + --msgbox "$(translate "The service has been removed successfully. You can reinstall it from the menu if desired.")" 10 70 else - whiptail --title "$(translate "Información")" \ - --msgbox "$(translate "El servicio no existe, no se requiere eliminar nada.")" 10 70 + whiptail --title "$(translate "Information")" \ + --msgbox "$(translate "The service does not exist, nothing to remove.")" 10 70 fi } -# Funciones requeridas por systemd +# Functions required by systemd start_silent() { mkdir -p "$PID_DIR" @@ -650,7 +1880,7 @@ start_silent() { echo $$ > "$PID_DIR/service.pid" - # Esperar que ambos procesos finalicen (mantiene vivo el servicio systemd) + # Wait for both processes to finish (keeps systemd service alive) wait $journal_pid wait $direct_pid @@ -665,10 +1895,19 @@ stop_silent() { -# Función para instalar el servicio como un servicio systemd + +# Function to install the service as a systemd service install_systemd_service() { mkdir -p "$PID_DIR" + + cat > "$WRAPPER_PATH" < /etc/systemd/system/proxmox-telegram.service <&1 1>&2 2>&3) @@ -742,4 +1981,4 @@ case "$1" in start_silent) start_silent ;; stop_silent) stop_silent ;; *) main_menu ;; -esac +esac \ No newline at end of file