#!/bin/bash # Configuration ============================================ REPO_URL="https://raw.githubusercontent.com/MacRimi/ProxMenux/main" BASE_DIR="/usr/local/share/proxmenux" UTILS_FILE="$BASE_DIR/utils.sh" VENV_PATH="/opt/googletrans-env" if [[ -f "$UTILS_FILE" ]]; then source "$UTILS_FILE" fi load_language initialize_cache CONFIG_FILE="/etc/proxmox-telegram.conf" PID_DIR="/var/run/proxmox-telegram" WRAPPER_PATH="/usr/local/bin/telegram-notifier-wrapper.sh" t() { translate "$1"; } declare -A IFACE_DOWN declare -A IFACE_DOWN_TIME disk_full_detected=false disk_nearly_full_detected=false inode_full_detected=false cpu_usage_history="" last_cpu_sustained_notification=0 last_swap_notification=0 # ================================================== # TELEGRAM # ================================================== # Create configuration file if it doesn't exist if [[ ! -f "$CONFIG_FILE" ]]; then cat < "$CONFIG_FILE" BOT_TOKEN="" CHAT_ID="" vm_start=0 vm_shutdown=0 vm_restart=0 vm_fail=0 update_available=0 update_complete=0 system_shutdown=0 system_problem=0 system_load_high=0 kernel_panic=0 disk_fail=0 disk_full=0 disk_io_error=0 node_disconnect=0 split_brain=0 network_down=0 network_saturation=0 firewall_issue=0 backup_complete=0 backup_fail=0 snapshot_complete=0 snapshot_fail=0 auth_fail=0 ip_block=0 user_permission_change=0 cpu_high=0 ram_high=0 temp_high=0 low_disk_space=0 EOF chmod 600 "$CONFIG_FILE" fi source "$CONFIG_FILE" ######################################################################## send_notification() { local message="$1" if [[ -z "$BOT_TOKEN" || -z "$CHAT_ID" ]]; then return 1 fi curl -s -X POST "https://api.telegram.org/bot$BOT_TOKEN/sendMessage" \ -d "chat_id=$CHAT_ID" \ -d "text=$message" > /dev/null 2>&1 } ######################################################################### # Function to configure Telegram configure_telegram() { [[ -f "$CONFIG_FILE" ]] && source "$CONFIG_FILE" BOT_TOKEN=$(whiptail --title "$(translate "Telegram Configuration")" \ --inputbox "$(translate "Enter your Telegram Bot Token:")" 10 70 "$BOT_TOKEN" 3>&1 1>&2 2>&3) if [[ $? -ne 0 ]]; then return fi CHAT_ID=$(whiptail --title "$(translate "Telegram Configuration")" \ --inputbox "$(translate "Enter your Telegram Chat ID:")" 10 70 "$CHAT_ID" 3>&1 1>&2 2>&3) if [[ $? -ne 0 ]]; then return fi # Save configuration to file if [[ -n "$BOT_TOKEN" && -n "$CHAT_ID" ]]; then cp "$CONFIG_FILE" "${CONFIG_FILE}.bak" 2>/dev/null if grep -q "^BOT_TOKEN=" "$CONFIG_FILE"; then sed -i "s/^BOT_TOKEN=.*/BOT_TOKEN=\"$BOT_TOKEN\"/" "$CONFIG_FILE" else echo "BOT_TOKEN=\"$BOT_TOKEN\"" >> "$CONFIG_FILE" fi if grep -q "^CHAT_ID=" "$CONFIG_FILE"; then sed -i "s/^CHAT_ID=.*/CHAT_ID=\"$CHAT_ID\"/" "$CONFIG_FILE" else echo "CHAT_ID=\"$CHAT_ID\"" >> "$CONFIG_FILE" fi source "$CONFIG_FILE" # Test the configuration immediately response=$(curl -s -X POST "https://api.telegram.org/bot$BOT_TOKEN/sendMessage" \ -d "chat_id=$CHAT_ID" \ -d "text=$(translate "Telegram is working correctly!")") if [[ "$response" =~ "ok\":true" ]]; then whiptail --title "$(translate "Success")" \ --msgbox "$(translate "Valid Telegram configuration. Notifications will be sent.")" 10 70 else whiptail --title "$(translate "Error")" \ --msgbox "$(translate "Invalid Telegram configuration. Please verify the token and chat ID.")" 10 70 fi else whiptail --title "$(translate "Error")" \ --msgbox "$(translate "Incomplete Telegram configuration. Please provide both token and chat ID.")" 10 70 fi } # ================================================== # ================================================== # NOTIFICATION CONFIGURATION # ================================================== # Options for the menu options=( "VM and Container|$(t 'VM/Container Start')|vm_start" "VM and Container|$(t 'VM/Container Shutdown')|vm_shutdown" "VM and Container|$(t 'VM/Container Restart')|vm_restart" "VM and Container|$(t 'VM/Container Start Failure')|vm_fail" "System|$(t 'New update available')|update_available" "System|$(t 'Update completed')|update_complete" "System|$(t 'System shutdown')|system_shutdown" "System|$(t 'System problem')|system_problem" "System|$(t 'High system load')|system_load_high" "Storage|$(t 'Disk failure')|disk_fail" "Storage|$(t 'Storage full')|disk_full" "Storage|$(t 'Read/Write issues')|disk_io_error" "Cluster|$(t 'Node disconnected')|node_disconnect" "Cluster|$(t 'Split-brain (quorum conflict)')|split_brain" "Network|$(t 'Network interface down')|network_down" "Network|$(t 'Network saturation')|network_saturation" "Network|$(t 'Firewall issue')|firewall_issue" "Backup and Snapshot|$(t 'Backup completed')|backup_complete" "Backup and Snapshot|$(t 'Backup failed')|backup_fail" "Backup and Snapshot|$(t 'Snapshot completed')|snapshot_complete" "Backup and Snapshot|$(t 'Snapshot failed')|snapshot_fail" "Security|$(t 'Failed authentication attempt')|auth_fail" "Security|$(t 'Automatic IP blocks')|ip_block" "Security|$(t 'User permission change')|user_permission_change" "Resources|$(t 'High CPU usage')|cpu_high" "Resources|$(t 'High RAM usage')|ram_high" "Resources|$(t 'High system temperature')|temp_high" "Resources|$(t 'Low disk space')|low_disk_space" ) # Function to configure notifications configure_notifications() { IFS=$'\n' sorted_options=($(for option in "${options[@]}"; do IFS='|' read -r category description var_name <<< "$option" printf "%s|%s|%s\n" "$category" "$description" "$var_name" done | sort -t'|' -k1,1 -k2,2)) unset IFS declare -A index_to_var index=1 menu_items=() for option in "${sorted_options[@]}"; do IFS='|' read -r category description var_name <<< "$option" index_to_var["$index"]="$var_name" formatted_item="$description" current_length=${#formatted_item} spaces_needed=$((50 - current_length)) for ((j = 0; j < spaces_needed; j++)); do formatted_item+=" " done formatted_item+="$category" state="OFF" [[ "$(eval echo \$$var_name)" -eq 1 ]] && state="ON" menu_items+=("$index" "$formatted_item" "$state") ((index++)) done # whiptail menu selected_indices=$(whiptail --backtitle "ProxMenuX" --title "$(translate "Telegram Notification Configuration")" \ --checklist --separate-output \ "\n$(translate "Select the events you want to receive:")\n" \ 30 100 20 \ "${menu_items[@]}" \ 3>&1 1>&2 2>&3) local result=$? if [[ $result -eq 0 ]]; then cp "$CONFIG_FILE" "${CONFIG_FILE}.bak" 2>/dev/null for var_name in "${index_to_var[@]}"; do sed -i "s/^$var_name=.*/$var_name=0/" "$CONFIG_FILE" done for selected_index in $selected_indices; do var_name="${index_to_var[$selected_index]}" sed -i "s/^$var_name=.*/$var_name=1/" "$CONFIG_FILE" done source "$CONFIG_FILE" whiptail --backtitle "ProxMenuX" --title "$(translate "Success")" \ --msgbox "$(translate "Configuration updated successfully.")" 10 70 fi } # ================================================== # Function to get VM/CT name from its ID get_vm_name() { local vmid="$1" local name="" if [[ -f "/etc/pve/qemu-server/$vmid.conf" ]]; then name=$(grep -i "^name:" "/etc/pve/qemu-server/$vmid.conf" | cut -d ' ' -f2-) elif [[ -f "/etc/pve/lxc/$vmid.conf" ]]; then name=$(grep -i "^hostname:" "/etc/pve/lxc/$vmid.conf" | cut -d ' ' -f2-) fi if [[ -n "$name" ]]; then echo "$name ($vmid)" else echo "$vmid" fi } # ================================================== # ================================================== # NOTIFICATION EVENTS # ================================================== # Function: capture events from journalctl capture_journal_events() { local processed_events_file="$PID_DIR/processed_events" mkdir -p "$PID_DIR" 2>/dev/null if [[ ! -f "$processed_events_file" ]]; then touch "$processed_events_file" fi while true; do # Use tail for Proxmox tasks file tail -F /var/log/pve/tasks/index 2>/dev/null | while read -r line; do event_id=$(echo "$line" | md5sum | cut -d' ' -f1) if grep -q "$event_id" "$processed_events_file" 2>/dev/null; then continue fi echo "$event_id" >> "$processed_events_file" tail -n 1000 "$processed_events_file" > "${processed_events_file}.tmp" 2>/dev/null && mv "${processed_events_file}.tmp" "$processed_events_file" 2>/dev/null local event_processed=false # ===== IMMEDIATE NOTIFICATION EVENTS ===== # VM or CT start failure (CRITICAL) if [[ "$vm_fail" -eq 1 ]] && [[ "$event_processed" = false ]]; then # Detect VM errors if [[ "$line" =~ "Failed to start VM" || "$line" =~ "qmstart" && "$line" =~ "err" || "$line" =~ "qmstart" && "$line" =~ "fail" ]]; then VM_ID=$(echo "$line" | grep -oP '(VM |qmstart:)\K[0-9]+') NAME=$(get_vm_name "$VM_ID") send_notification "🚨 $(translate "CRITICAL: Failed to start VM:") $NAME" event_processed=true # Detect CT (LXC) errors elif [[ "$line" =~ "Failed to start CT" || "$line" =~ "lxc-start" && "$line" =~ "err" || "$line" =~ "lxc-start" && "$line" =~ "fail" ]]; then CT_ID=$(echo "$line" | grep -oP '(CT |lxc-start:)\K[0-9]+') NAME=$(get_vm_name "$CT_ID") send_notification "🚨 $(translate "CRITICAL: Failed to start Container:") $NAME" event_processed=true fi fi # Disk I/O errors (CRITICAL) if [[ "$disk_io_error" -eq 1 ]] && [[ "$event_processed" = false ]]; then if [[ "$line" =~ "I/O error" || "$line" =~ "read error" || "$line" =~ "write error" || "$line" =~ "blk_update_request" || "$line" =~ "buffer I/O error" || "$line" =~ "medium error" || "$line" =~ "sense key: Medium Error" || "$line" =~ "ata.*failed command" || "$line" =~ "SCSI error" ]]; then # Extract device name with improved pattern matching DISK=$(echo "$line" | grep -oE "/dev/[a-zA-Z0-9]+" || echo "$line" | grep -oE "sd[a-z][0-9]*" || echo "$line" | grep -oE "nvme[0-9]+n[0-9]+" || echo "unknown") # Try to extract error type ERROR_TYPE="unknown" if [[ "$line" =~ "read error" ]]; then ERROR_TYPE="read" elif [[ "$line" =~ "write error" ]]; then ERROR_TYPE="write" elif [[ "$line" =~ "medium error" || "$line" =~ "sense key: Medium Error" ]]; then ERROR_TYPE="medium" elif [[ "$line" =~ "timeout" ]]; then ERROR_TYPE="timeout" fi # Try to extract sector information if available SECTOR=$(echo "$line" | grep -oP "sector [0-9]+" || echo "") if [[ -n "$SECTOR" ]]; then SECTOR=" ($SECTOR)" fi # Send notification with enhanced information send_notification "🚨 $(translate "CRITICAL: Disk ${ERROR_TYPE} error on:") $DISK$SECTOR" event_processed=true fi fi # Disk failure (CRITICAL) if [[ "$disk_fail" -eq 1 ]] && [[ "$event_processed" = false ]]; then if [[ "$line" =~ "disk failure" || "$line" =~ "hard drive failure" || "$line" =~ "SMART error" || "$line" =~ "SMART failure" || "$line" =~ "SMART Status BAD" || "$line" =~ "failed SMART" || "$line" =~ "drive failure" || "$line" =~ "bad sectors" || "$line" =~ "sector reallocation" || "$line" =~ "uncorrectable error" || "$line" =~ "media error" || "$line" =~ "not responding" && "$line" =~ "disk" || "$line" =~ "SSD life critical" || "$line" =~ "SSD wear" && "$line" =~ "critical" ]]; then # Extract device name with improved pattern matching DISK=$(echo "$line" | grep -oE "/dev/[a-zA-Z0-9]+" || echo "$line" | grep -oE "sd[a-z][0-9]*" || echo "$line" | grep -oE "nvme[0-9]+n[0-9]+" || echo "$line" | grep -oE "ata[0-9]+" || echo "unknown") # Try to determine failure type FAILURE_TYPE="hardware" if [[ "$line" =~ "SMART" ]]; then FAILURE_TYPE="SMART" # Try to extract SMART attribute if available SMART_ATTR=$(echo "$line" | grep -oP "Attribute \K[^:]*" || echo "$line" | grep -oP "SMART attribute \K[^:]*" || echo "") if [[ -n "$SMART_ATTR" ]]; then SMART_ATTR=" (Attribute: $SMART_ATTR)" fi elif [[ "$line" =~ "bad sectors" || "$line" =~ "sector reallocation" ]]; then FAILURE_TYPE="bad sectors" elif [[ "$line" =~ "SSD" ]]; then FAILURE_TYPE="SSD wear" elif [[ "$line" =~ "not responding" ]]; then FAILURE_TYPE="unresponsive" fi # Send notification with enhanced information send_notification "🚨 $(translate "CRITICAL: Disk failure detected") ($FAILURE_TYPE): $DISK$SMART_ATTR" event_processed=true fi fi # Snapshot failed (CRITICAL) if [[ "$line" =~ "snapshot" ]] && [[ "$snapshot_fail" -eq 1 ]] && [[ "$line" =~ "error" || "$line" =~ "fail" || "$line" =~ "unable to" || "$line" =~ "cannot" ]] && [[ "$event_processed" = false ]]; then # Extract VM/CT ID VM_ID=$(echo "$line" | grep -oP 'TASK \K[0-9]+') if [[ -z "$VM_ID" ]]; then VM_ID=$(echo "$line" | grep -oP 'VM \K[0-9]+') fi if [[ -z "$VM_ID" ]]; then VM_ID=$(echo "$line" | grep -oP 'CT \K[0-9]+') fi # Extract snapshot ID SNAPSHOT_ID=$(echo "$line" | grep -oP 'snapshot \K[a-zA-Z0-9_-]+') if [[ -z "$SNAPSHOT_ID" ]]; then SNAPSHOT_ID=$(echo "$line" | grep -oP 'snap\K[a-zA-Z0-9_-]+') fi # Try to determine error reason ERROR_REASON="" if [[ "$line" =~ "no space" || "$line" =~ "space exhausted" || "$line" =~ "out of space" ]]; then ERROR_REASON=" (No space left)" elif [[ "$line" =~ "timeout" ]]; then ERROR_REASON=" (Operation timed out)" elif [[ "$line" =~ "already exists" ]]; then ERROR_REASON=" (Snapshot already exists)" elif [[ "$line" =~ "locked" || "$line" =~ "lock" ]]; then ERROR_REASON=" (Resource locked)" elif [[ "$line" =~ "permission" ]]; then ERROR_REASON=" (Permission denied)" elif [[ "$line" =~ "quorum" ]]; then ERROR_REASON=" (Quorum error)" fi # Format the notification message if [[ -n "$VM_ID" ]]; then NAME=$(get_vm_name "$VM_ID") if [[ -n "$SNAPSHOT_ID" ]]; then send_notification "🚨 $(translate "CRITICAL: Snapshot failed for:") $NAME (ID: $SNAPSHOT_ID)$ERROR_REASON" else send_notification "🚨 $(translate "CRITICAL: Snapshot failed for:") $NAME$ERROR_REASON" fi else if [[ -n "$SNAPSHOT_ID" ]]; then send_notification "🚨 $(translate "CRITICAL: Snapshot failed") (ID: $SNAPSHOT_ID)$ERROR_REASON" else send_notification "🚨 $(translate "CRITICAL: Snapshot failed")$ERROR_REASON" fi fi event_processed=true fi # Backup failed (CRITICAL) if [[ "$backup_fail" -eq 1 ]] && [[ "$event_processed" = false ]]; then # Expanded pattern matching for backup failures if [[ "$line" =~ "backup" && ("$line" =~ "error" || "$line" =~ "fail" || "$line" =~ "unable to" || "$line" =~ "cannot" || "$line" =~ "abort") ]]; then # Extract VM/CT ID VM_ID=$(echo "$line" | grep -oP 'TASK \K[0-9]+') if [[ -z "$VM_ID" ]]; then VM_ID=$(echo "$line" | grep -oP 'VM \K[0-9]+') fi if [[ -z "$VM_ID" ]]; then VM_ID=$(echo "$line" | grep -oP 'CT \K[0-9]+') fi # Extract backup target BACKUP_TARGET=$(echo "$line" | grep -oP 'to ["\047]?\K[a-zA-Z0-9_-]+') if [[ -z "$BACKUP_TARGET" ]]; then BACKUP_TARGET=$(echo "$line" | grep -oP 'storage ["\047]?\K[a-zA-Z0-9_-]+') fi # Try to determine error reason ERROR_REASON="" if [[ "$line" =~ "no space" || "$line" =~ "space exhausted" || "$line" =~ "out of space" ]]; then ERROR_REASON=" (No space left)" elif [[ "$line" =~ "timeout" ]]; then ERROR_REASON=" (Operation timed out)" elif [[ "$line" =~ "connection" && "$line" =~ "refused" ]]; then ERROR_REASON=" (Connection refused)" elif [[ "$line" =~ "network" ]]; then ERROR_REASON=" (Network error)" elif [[ "$line" =~ "permission" ]]; then ERROR_REASON=" (Permission denied)" elif [[ "$line" =~ "locked" || "$line" =~ "lock" ]]; then ERROR_REASON=" (Resource locked)" elif [[ "$line" =~ "quorum" ]]; then ERROR_REASON=" (Quorum error)" elif [[ "$line" =~ "already running" ]]; then ERROR_REASON=" (Another backup is already running)" fi # Format the notification message if [[ -n "$VM_ID" ]]; then NAME=$(get_vm_name "$VM_ID") if [[ -n "$BACKUP_TARGET" ]]; then send_notification "🚨 $(translate "CRITICAL: Backup failed for:") $NAME (Target: $BACKUP_TARGET)$ERROR_REASON" else send_notification "🚨 $(translate "CRITICAL: Backup failed for:") $NAME$ERROR_REASON" fi else if [[ -n "$BACKUP_TARGET" ]]; then send_notification "🚨 $(translate "CRITICAL: Backup failed") (Target: $BACKUP_TARGET)$ERROR_REASON" else send_notification "🚨 $(translate "CRITICAL: Backup failed")$ERROR_REASON" fi fi event_processed=true fi fi # Failed authentication attempt (CRITICAL) if [[ "$auth_fail" -eq 1 ]] && [[ "$event_processed" = false ]]; then if [[ "$line" =~ "authentication failure" || "$line" =~ "auth fail" || "$line" =~ "login failed" || "$line" =~ "Failed password" || "$line" =~ "Invalid user" || "$line" =~ "failed login" || "$line" =~ "authentication error" || ( "$line" =~ "unauthorized" && "$line" =~ "access" ) ]]; then # Extract username USER=$(echo "$line" | grep -oP 'user=\K[^ ]+') if [[ -z "$USER" ]]; then USER=$(echo "$line" | grep -oP 'user \K[^ ]+') fi if [[ -z "$USER" ]]; then USER=$(echo "$line" | grep -oP 'for user \K[^ ]+') fi if [[ -z "$USER" ]]; then USER=$(echo "$line" | grep -oP 'for invalid user \K[^ ]+') fi if [[ -z "$USER" ]]; then USER=$(echo "$line" | grep -oP 'for \K[^ ]+' | grep -v "invalid") fi if [[ -z "$USER" ]]; then USER="unknown" fi # Extract IP address IP=$(echo "$line" | grep -oP 'rhost=\K[^ ]+') if [[ -z "$IP" ]]; then IP=$(echo "$line" | grep -oP 'from \K[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+') fi if [[ -z "$IP" ]]; then IP=$(echo "$line" | grep -oP 'from \K[0-9a-f:]+') fi if [[ -z "$IP" ]]; then IP=$(echo "$line" | grep -oP 'IP: \K[^ ]+') fi if [[ -z "$IP" ]]; then IP="unknown" fi # Try to determine authentication service SERVICE="system" if [[ "$line" =~ "sshd" ]]; then SERVICE="SSH" elif [[ "$line" =~ "pvedaemon" || "$line" =~ "pveproxy" ]]; then SERVICE="Proxmox Web UI" elif [[ "$line" =~ "nginx" || "$line" =~ "apache" ]]; then SERVICE="Web Server" elif [[ "$line" =~ "smtp" || "$line" =~ "mail" ]]; then SERVICE="Mail" elif [[ "$line" =~ "ftp" ]]; then SERVICE="FTP" fi # Try to extract authentication method if available AUTH_METHOD="" if [[ "$line" =~ "password" ]]; then AUTH_METHOD=" (Password auth)" elif [[ "$line" =~ "publickey" ]]; then AUTH_METHOD=" (Public key auth)" elif [[ "$line" =~ "keyboard-interactive" ]]; then AUTH_METHOD=" (Interactive auth)" elif [[ "$line" =~ "PAM" ]]; then AUTH_METHOD=" (PAM auth)" fi # Count failed attempts from this IP if possible ATTEMPT_COUNT="" if [[ -n "$IP" && "$IP" != "unknown" ]]; then # Use journalctl to count recent failed attempts from this IP if command -v journalctl &>/dev/null; then COUNT=$(journalctl -q --since "1 hour ago" | grep -c "$IP") if [[ $COUNT -gt 1 ]]; then ATTEMPT_COUNT=" ($COUNT attempts in the last hour)" fi fi fi # Send notification with enhanced information send_notification "🚨 $(translate "CRITICAL: Failed authentication attempt:") $USER from $IP - $SERVICE$AUTH_METHOD$ATTEMPT_COUNT" event_processed=true fi fi # Firewall issue (CRITICAL) if [[ "$line" =~ "firewall" && "$firewall_issue" -eq 1 && ( "$line" =~ "error" || "$line" =~ "block" || "$line" =~ "reject" || "$line" =~ "drop" || "$line" =~ "denied" || "$line" =~ "fail" || "$line" =~ "invalid" ) && "$event_processed" = false ]]; then # Try to determine the type of firewall issue ISSUE_TYPE="issue" if [[ "$line" =~ "error" ]]; then ISSUE_TYPE="configuration error" elif [[ "$line" =~ "block" || "$line" =~ "denied" ]]; then ISSUE_TYPE="blocked connection" elif [[ "$line" =~ "reject" ]]; then ISSUE_TYPE="rejected connection" elif [[ "$line" =~ "drop" ]]; then ISSUE_TYPE="dropped packet" elif [[ "$line" =~ "invalid" ]]; then ISSUE_TYPE="invalid rule" fi # Extract source IP SRC_IP=$(echo "$line" | grep -oP 'SRC=\K[0-9.]+') if [[ -z "$SRC_IP" ]]; then SRC_IP=$(echo "$line" | grep -oP 'from \K[0-9.]+') fi if [[ -z "$SRC_IP" ]]; then SRC_IP=$(echo "$line" | grep -oP 'source \K[0-9.]+') fi # Extract destination IP DST_IP=$(echo "$line" | grep -oP 'DST=\K[0-9.]+') if [[ -z "$DST_IP" ]]; then DST_IP=$(echo "$line" | grep -oP 'to \K[0-9.]+') fi if [[ -z "$DST_IP" ]]; then DST_IP=$(echo "$line" | grep -oP 'destination \K[0-9.]+') fi # Try to extract port information if available PORT_INFO="" SRC_PORT=$(echo "$line" | grep -oP 'SPT=\K[0-9]+' || echo "") DST_PORT=$(echo "$line" | grep -oP 'DPT=\K[0-9]+' || echo "") if [[ -n "$SRC_PORT" && -n "$DST_PORT" ]]; then PORT_INFO=" (Port $SRC_PORT → $DST_PORT)" elif [[ -n "$DST_PORT" ]]; then PORT_INFO=" (Port $DST_PORT)" fi # Try to extract protocol if available PROTO=$(echo "$line" | grep -oP 'PROTO=\K[A-Za-z]+' || echo "$line" | grep -oP 'protocol \K[A-Za-z]+' || echo "") if [[ -n "$PROTO" ]]; then PROTO=" $PROTO" fi # Try to extract interface if available IFACE=$(echo "$line" | grep -oP 'IN=\K[^ ]+' || echo "$line" | grep -oP 'OUT=\K[^ ]+' || echo "$line" | grep -oP 'on \K[^ ]+' || echo "") if [[ -n "$IFACE" ]]; then IFACE=" on $IFACE" fi # Format the notification message if [[ -n "$SRC_IP" && -n "$DST_IP" ]]; then send_notification "🚨 $(translate "CRITICAL: Firewall ${ISSUE_TYPE}:") $SRC_IP → $DST_IP$PORT_INFO$PROTO$IFACE" elif [[ -n "$SRC_IP" ]]; then send_notification "🚨 $(translate "CRITICAL: Firewall ${ISSUE_TYPE}:") from $SRC_IP$PORT_INFO$PROTO$IFACE" elif [[ -n "$DST_IP" ]]; then send_notification "🚨 $(translate "CRITICAL: Firewall ${ISSUE_TYPE}:") to $DST_IP$PORT_INFO$PROTO$IFACE" else # Extract a more concise message from the line CONCISE_MSG=$(echo "$line" | sed -E 's/.*firewall[^:]*: ?//i' | cut -c 1-100) send_notification "🚨 $(translate "CRITICAL: Firewall ${ISSUE_TYPE}:") $CONCISE_MSG" fi event_processed=true fi # Network interface recovery handler if [[ "$network_down" -eq 1 ]] && [[ "$event_processed" = false ]]; then if [[ "$line" =~ (eth[0-9]+|eno[0-9]+|enp[0-9]+s[0-9]+|wlan[0-9]+) ]]; then IFACE="${BASH_REMATCH[1]}" # Detect interface going down if [[ "$line" =~ "link down" || "$line" =~ "disconnected" || "$line" =~ "no carrier" || "$line" =~ "failure" ]]; then # Mark interface as down and store the timestamp IFACE_DOWN["$IFACE"]=true IFACE_DOWN_TIME["$IFACE"]="$(date +%s)" fi # Detect interface recovery if [[ "$line" =~ "link up" || "$line" =~ "activated" ]]; then if [[ "${IFACE_DOWN[$IFACE]}" == true ]]; then RESTORE_TIME=$(date +%s) START_TIME=${IFACE_DOWN_TIME[$IFACE]} DURATION=$((RESTORE_TIME - START_TIME)) # Check if this is the default route interface PRIMARY="" if ip route | grep -q "default.*$IFACE"; then PRIMARY=" (PRIMARY INTERFACE)" fi # Send notification after connection is restored send_notification "$(translate '✅ Network connection was lost and has been restored on') $IFACE$PRIMARY. $(translate 'Downtime duration'): ${DURATION}s" # Clean up the interface state unset IFACE_DOWN["$IFACE"] unset IFACE_DOWN_TIME["$IFACE"] event_processed=true fi fi fi fi # Split-brain detected (CRITICAL) if [[ "$split_brain" -eq 1 ]] && [[ "$event_processed" = false ]]; then # Expanded pattern matching for split-brain detection if [[ "$line" =~ "Split-Brain" || "$line" =~ "split brain" || "$line" =~ "split-brain" || ( "$line" =~ "fencing" && "$line" =~ "required" ) || ( "$line" =~ "cluster" && "$line" =~ "partition" ) ]]; then NODES=$(echo "$line" | grep -oP 'nodes: \K[^.]+') if [[ -z "$NODES" ]]; then NODES=$(echo "$line" | grep -oP 'between \K[^.]+') fi if [[ -n "$NODES" ]]; then NODES=" (Affected nodes: $NODES)" fi # Try to extract fence status if available FENCE_INFO="" if [[ "$line" =~ "fencing" ]]; then if [[ "$line" =~ "successful" ]]; then FENCE_INFO=" (Fencing successful)" elif [[ "$line" =~ "failed" ]]; then FENCE_INFO=" (Fencing failed)" else FENCE_INFO=" (Fencing required)" fi fi # Send notification with enhanced information send_notification "🚨 $(translate "CRITICAL: Split-brain detected in cluster")$NODES$FENCE_INFO - $(translate "Manual intervention required!")" event_processed=true fi fi # Node disconnected from cluster (CRITICAL) if [[ "$node_disconnect" -eq 1 ]] && [[ "$event_processed" = false ]]; then # Expanded pattern matching for node disconnection if [[ ( "$line" =~ "quorum" && "$line" =~ "lost" ) || ( "$line" =~ "node" && "$line" =~ "left" ) || ( "$line" =~ "node" && "$line" =~ "offline" ) || ( "$line" =~ "connection" && "$line" =~ "lost" && "$line" =~ "node" ) ]]; then # Extract node name with improved pattern matching NODE=$(echo "$line" | grep -oP 'node \K[^ ,.]+') if [[ -z "$NODE" ]]; then NODE=$(echo "$line" | grep -oP 'Node \K[^ ,.]+') fi if [[ -z "$NODE" ]]; then NODE=$(echo "$line" | grep -oP 'from \K[^ ,.]+') fi if [[ -z "$NODE" ]]; then NODE="unknown" fi # Try to determine if quorum is still valid QUORUM_STATUS="" if [[ "$line" =~ "quorum" ]]; then if [[ "$line" =~ "lost" ]]; then QUORUM_STATUS=" (Quorum lost)" elif [[ "$line" =~ "still" && "$line" =~ "valid" ]]; then QUORUM_STATUS=" (Quorum still valid)" fi fi # Try to extract remaining nodes count if available REMAINING_COUNT=$(echo "$line" | grep -oP 'remaining nodes: \K[0-9]+') if [[ -z "$REMAINING_COUNT" ]]; then REMAINING_COUNT=$(echo "$line" | grep -oP 'nodes left: \K[0-9]+') fi if [[ -n "$REMAINING_COUNT" ]]; then REMAINING=" ($REMAINING_COUNT nodes remaining)" fi # Try to determine if this is expected or unexpected EXPECTED="" if [[ "$line" =~ "shutdown" || "$line" =~ "maintenance" ]]; then EXPECTED=" (Planned)" else EXPECTED=" (Unexpected)" fi # Send notification with enhanced information send_notification "🚨 $(translate "CRITICAL: Node disconnected from cluster:") $NODE$QUORUM_STATUS$REMAINING$EXPECTED" event_processed=true fi fi # System shutdown (NON-CRITICAL - with interval) if [[ "$system_shutdown" -eq 1 ]] && (( current_time - last_shutdown_notification > resource_interval )); then if [[ "$line" =~ "systemd-journald" && "$line" =~ "Journal stopped" ]]; then # No hay razón específica, pero podemos indicar que se detuvo el journal (lo último antes del apagado) send_notification "⚠️ $(translate "System is shutting down")" last_shutdown_notification=$current_time # Log the event logger -t proxmox-notify "System is shutting down (journal stopped)" event_processed=true fi fi # System problem (CRITICAL) if [[ "$system_problem" -eq 1 ]] && [[ "$event_processed" = false ]]; then if [[ "$line" =~ "kernel panic" || "$line" =~ "segfault" || "$line" =~ "Out of memory" || "$line" =~ "BUG:" || "$line" =~ "Call Trace:" || "$line" =~ "Failed to start" || "$line" =~ "Unit .* failed" || "$line" =~ "Service .* exited with" ]]; then # Extract possible service name or component if available COMPONENT=$(echo "$line" | grep -oP 'Failed to start \K[^:]+' || echo "$line" | grep -oP 'Unit \K[^ ]+' || echo "$line" | grep -oP 'Service \K[^ ]+' || echo "unknown") # Format and send the notification send_notification "🚨 $(translate "CRITICAL: System problem detected") ($COMPONENT)" logger -t proxmox-notify "CRITICAL: System problem detected ($COMPONENT)" event_processed=true fi fi # User permission change (NON-CRITICAL - with interval) if [[ "$user_permission_change" -eq 1 ]] && (( current_time - last_user_permission_notification > resource_interval )); then if [[ "$line" =~ "set permissions" || "$line" =~ "user added to group" || "$line" =~ "user removed from group" || "$line" =~ "ACL updated" || "$line" =~ "Role assigned" || "$line" =~ "Changed user permissions" ]]; then # Try to extract username USER=$(echo "$line" | grep -oP 'user \K[^ ]+' || echo "$line" | grep -oP 'for user \K[^ ]+' || echo "$line" | grep -oP 'User \K[^ ]+' || echo "unknown") # Try to detect change type ACTION="Permission change" if [[ "$line" =~ "added to group" ]]; then ACTION="Added to group" elif [[ "$line" =~ "removed from group" ]]; then ACTION="Removed from group" elif [[ "$line" =~ "Role assigned" ]]; then ACTION="Role assigned" elif [[ "$line" =~ "ACL updated" ]]; then ACTION="ACL updated" fi send_notification "🔐 $(translate "User permission changed:") $USER ($ACTION)" logger -t proxmox-notify "User permission changed: $USER ($ACTION)" last_user_permission_notification=$current_time event_processed=true fi fi # Network saturation (NON-CRITICAL - with interval) if [[ "$network_saturation" -eq 1 ]] && (( current_time - last_network_saturation_notification > resource_interval )); then if command -v ip &>/dev/null; then saturated_ifaces="" # Loop over interfaces and look for rx/tx errors/drops while read -r line; do iface=$(echo "$line" | awk -F: '{print $2}' | xargs) stats=$(ip -s link show "$iface" | awk '/RX:|TX:/ {getline; print}') rx_errors=$(echo "$stats" | awk 'NR==1 {print $3}') tx_errors=$(echo "$stats" | awk 'NR==2 {print $3}') rx_dropped=$(echo "$stats" | awk 'NR==1 {print $4}') tx_dropped=$(echo "$stats" | awk 'NR==2 {print $4}') if (( rx_errors > 100 || tx_errors > 100 || rx_dropped > 100 || tx_dropped > 100 )); then saturated_ifaces+="$iface (RX errors: $rx_errors, TX errors: $tx_errors, RX dropped: $rx_dropped, TX dropped: $tx_dropped), " fi done < <(ip -o link show | awk -F': ' '{print $2}') # Clean and notify if [[ -n "$saturated_ifaces" ]]; then saturated_ifaces="${saturated_ifaces%, }" send_notification "⚠️ $(translate "WARNING: Network saturation or errors detected on:") $saturated_ifaces" logger -t proxmox-notify "WARNING: Network saturation on: $saturated_ifaces" last_network_saturation_notification=$current_time fi fi fi # Automatic IP blocks (NON-CRITICAL - with interval) if [[ "$ip_block" -eq 1 ]] && (( current_time - last_ip_block_notification > resource_interval )); then if [[ "$line" =~ "fail2ban" || "$line" =~ "Banned IP" || "$line" =~ "Blocking IP" || "$line" =~ "DROP" && "$line" =~ "SRC=" || "$line" =~ "REJECT" && "$line" =~ "SRC=" ]]; then # Try to extract IP address IP=$(echo "$line" | grep -oP 'SRC=\K[0-9.]+' || echo "$line" | grep -oP 'from \K[0-9.]+' || echo "$line" | grep -oP 'Banned IP \K[0-9.]+' || echo "$line" | grep -oP 'Blocking IP \K[0-9.]+' || echo "unknown") # Detect source (Fail2ban, Firewall, etc.) SOURCE="Firewall" if [[ "$line" =~ "fail2ban" ]]; then SOURCE="Fail2ban" elif [[ "$line" =~ "pve-firewall" ]]; then SOURCE="PVE Firewall" fi send_notification "🔒 $(translate "Automatic IP block detected") ($IP - $SOURCE)" logger -t proxmox-notify "IP block detected: $IP ($SOURCE)" last_ip_block_notification=$current_time event_processed=true fi fi # ===== NON-CRITICAL EVENTS (IMMEDIATE) ===== # VM/CT start (NON-CRITICAL but immediate) if [[ "$vm_start" -eq 1 ]] && [[ "$event_processed" = false ]]; then # VM start detection if [[ "$line" =~ "qmstart" && ! "$line" =~ "err" && ! "$line" =~ "fail" ]]; then VM_ID=$(echo "$line" | grep -oP 'qmstart:\K[0-9]+' || echo "$line" | grep -oP 'VM \K[0-9]+' || echo "") if [[ -n "$VM_ID" ]]; then NAME=$(get_vm_name "$VM_ID") # Try to extract additional information EXTRA_INFO="" # Check if this is a template if [[ "$line" =~ "template" ]]; then EXTRA_INFO=" (Template)" fi # Check if this is a restore or clone operation if [[ "$line" =~ "restore" ]]; then EXTRA_INFO=" (Restored)" elif [[ "$line" =~ "clone" ]]; then EXTRA_INFO=" (Cloned)" fi send_notification "✅ $(translate "VM started successfully:") $NAME$EXTRA_INFO" event_processed=true fi # LXC container start detection elif [[ "$line" =~ "lxc-start" && ! "$line" =~ "err" && ! "$line" =~ "fail" ]] || [[ "$line" =~ "Starting CT" && ! "$line" =~ "err" && ! "$line" =~ "fail" ]]; then CT_ID=$(echo "$line" | grep -oP 'lxc-start:\K[0-9]+' || echo "$line" | grep -oP 'CT \K[0-9]+' || echo "") if [[ -n "$CT_ID" ]]; then NAME=$(get_vm_name "$CT_ID") # Try to extract additional information EXTRA_INFO="" # Check if this is a template if [[ "$line" =~ "template" ]]; then EXTRA_INFO=" (Template)" fi # Check if this is a restore or clone operation if [[ "$line" =~ "restore" ]]; then EXTRA_INFO=" (Restored)" elif [[ "$line" =~ "clone" ]]; then EXTRA_INFO=" (Cloned)" fi send_notification "✅ $(translate "Container started successfully:") $NAME$EXTRA_INFO" event_processed=true fi fi fi # VM/CT shutdown (NON-CRITICAL but immediate) if [[ "$vm_shutdown" -eq 1 ]] && [[ "$event_processed" = false ]]; then # VM shutdown detection if [[ "$line" =~ "qmstop" && ! "$line" =~ "err" && ! "$line" =~ "fail" ]]; then VM_ID=$(echo "$line" | grep -oP 'qmstop:\K[0-9]+' || echo "$line" | grep -oP 'VM \K[0-9]+' || echo "") if [[ -n "$VM_ID" ]]; then NAME=$(get_vm_name "$VM_ID") # Try to determine shutdown type SHUTDOWN_TYPE="" if [[ "$line" =~ "force" || "$line" =~ "kill" ]]; then SHUTDOWN_TYPE=" (Forced)" elif [[ "$line" =~ "suspend" ]]; then SHUTDOWN_TYPE=" (Suspended)" elif [[ "$line" =~ "hibernate" ]]; then SHUTDOWN_TYPE=" (Hibernated)" elif [[ "$line" =~ "timeout" ]]; then SHUTDOWN_TYPE=" (Timeout)" elif [[ "$line" =~ "acpi" ]]; then SHUTDOWN_TYPE=" (ACPI shutdown)" fi send_notification "✅ $(translate "VM stopped successfully:") $NAME$SHUTDOWN_TYPE" event_processed=true fi # LXC container shutdown detection elif [[ "$line" =~ "lxc-stop" && ! "$line" =~ "err" && ! "$line" =~ "fail" ]] || [[ "$line" =~ "Stopping CT" && ! "$line" =~ "err" && ! "$line" =~ "fail" ]]; then CT_ID=$(echo "$line" | grep -oP 'lxc-stop:\K[0-9]+' || echo "$line" | grep -oP 'CT \K[0-9]+' || echo "") if [[ -n "$CT_ID" ]]; then NAME=$(get_vm_name "$CT_ID") # Try to determine shutdown type SHUTDOWN_TYPE="" if [[ "$line" =~ "force" || "$line" =~ "kill" ]]; then SHUTDOWN_TYPE=" (Forced)" elif [[ "$line" =~ "timeout" ]]; then SHUTDOWN_TYPE=" (Timeout)" fi send_notification "✅ $(translate "Container stopped successfully:") $NAME$SHUTDOWN_TYPE" event_processed=true fi fi fi # VM/CT restart (NON-CRITICAL but immediate) if [[ "$vm_restart" -eq 1 ]] && [[ "$event_processed" = false ]]; then # VM restart detection if [[ ("$line" =~ "qmreset" || "$line" =~ "qmreboot") && ! "$line" =~ "err" && ! "$line" =~ "fail" ]]; then VM_ID=$(echo "$line" | grep -oP '(qmreset|qmreboot):\K[0-9]+' || echo "$line" | grep -oP 'VM \K[0-9]+' || echo "") if [[ -n "$VM_ID" ]]; then NAME=$(get_vm_name "$VM_ID") # Try to determine restart type RESTART_TYPE="" if [[ "$line" =~ "qmreset" ]]; then RESTART_TYPE=" (Hard reset)" elif [[ "$line" =~ "force" || "$line" =~ "kill" ]]; then RESTART_TYPE=" (Forced)" elif [[ "$line" =~ "timeout" ]]; then RESTART_TYPE=" (After timeout)" elif [[ "$line" =~ "acpi" ]]; then RESTART_TYPE=" (ACPI restart)" fi send_notification "✅ $(translate "VM restarted successfully:") $NAME$RESTART_TYPE" event_processed=true fi # LXC container restart detection elif [[ "$line" =~ "lxc-restart" || "$line" =~ "Restarting CT" || ("$line" =~ "lxc-stop" && "$line" =~ "lxc-start" && "$line" =~ "restart") ]] && [[ ! "$line" =~ "err" && ! "$line" =~ "fail" ]]; then CT_ID=$(echo "$line" | grep -oP 'lxc-restart:\K[0-9]+' || echo "$line" | grep -oP 'CT \K[0-9]+' || echo "$line" | grep -oP 'lxc-(stop|start):\K[0-9]+' || echo "") if [[ -n "$CT_ID" ]]; then NAME=$(get_vm_name "$CT_ID") # Try to determine restart type RESTART_TYPE="" if [[ "$line" =~ "force" || "$line" =~ "kill" ]]; then RESTART_TYPE=" (Forced)" elif [[ "$line" =~ "timeout" ]]; then RESTART_TYPE=" (After timeout)" fi send_notification "✅ $(translate "Container restarted successfully:") $NAME$RESTART_TYPE" event_processed=true fi fi fi # Snapshot completed (NON-CRITICAL but immediate) if [[ "$line" =~ "snapshot" ]] && [[ "$snapshot_complete" -eq 1 ]] && [[ ! "$line" =~ "error" ]] && [[ "$event_processed" = false ]]; then # Additional pattern matching for completed snapshots if [[ "$line" =~ "complete" || "$line" =~ "finished" || "$line" =~ "success" || ! "$line" =~ "fail" && ! "$line" =~ "unable" ]]; then # Extract VM/CT ID with improved pattern matching VM_ID=$(echo "$line" | grep -oP 'TASK \K[0-9]+') if [[ -z "$VM_ID" ]]; then VM_ID=$(echo "$line" | grep -oP 'VM \K[0-9]+') fi if [[ -z "$VM_ID" ]]; then VM_ID=$(echo "$line" | grep -oP 'CT \K[0-9]+') fi # Try to extract snapshot name/ID if available SNAPSHOT_NAME=$(echo "$line" | grep -oP 'snapshot \K[a-zA-Z0-9_-]+' || echo "$line" | grep -oP 'snap\K[a-zA-Z0-9_-]+' || echo "$line" | grep -oP 'name: \K[a-zA-Z0-9_-]+' || echo "") # Try to extract snapshot size if available SNAPSHOT_SIZE=$(echo "$line" | grep -oP 'size: \K[0-9.]+[KMGT]B' || echo "$line" | grep -oP '[0-9.]+[KMGT]B' || echo "") # Try to extract duration if available DURATION=$(echo "$line" | grep -oP 'duration: \K[0-9.]+s' || echo "$line" | grep -oP 'in \K[0-9.]+s' || echo "$line" | grep -oP 'took \K[0-9.]+s' || echo "") # Format additional information ADDITIONAL_INFO="" if [[ -n "$SNAPSHOT_NAME" ]]; then ADDITIONAL_INFO+=" (Name: $SNAPSHOT_NAME" if [[ -n "$SNAPSHOT_SIZE" ]]; then ADDITIONAL_INFO+=", Size: $SNAPSHOT_SIZE" fi if [[ -n "$DURATION" ]]; then ADDITIONAL_INFO+=", Duration: $DURATION" fi ADDITIONAL_INFO+=")" elif [[ -n "$SNAPSHOT_SIZE" || -n "$DURATION" ]]; then ADDITIONAL_INFO+=" (" if [[ -n "$SNAPSHOT_SIZE" ]]; then ADDITIONAL_INFO+="Size: $SNAPSHOT_SIZE" if [[ -n "$DURATION" ]]; then ADDITIONAL_INFO+=", " fi fi if [[ -n "$DURATION" ]]; then ADDITIONAL_INFO+="Duration: $DURATION" fi ADDITIONAL_INFO+=")" fi # Try to determine snapshot type SNAPSHOT_TYPE="" if [[ "$line" =~ "memory" || "$line" =~ "ram" ]]; then SNAPSHOT_TYPE=" (With RAM)" elif [[ "$line" =~ "disk-only" ]]; then SNAPSHOT_TYPE=" (Disk only)" fi # Format the notification message if [[ -n "$VM_ID" ]]; then NAME=$(get_vm_name "$VM_ID") send_notification "✅ $(translate "Snapshot completed for:") $NAME$ADDITIONAL_INFO$SNAPSHOT_TYPE" else send_notification "✅ $(translate "Snapshot completed")$ADDITIONAL_INFO$SNAPSHOT_TYPE" fi event_processed=true fi fi # Backup completed (NON-CRITICAL but immediate) if [[ "$line" =~ "backup" && "$backup_complete" -eq 1 && ( "$line" =~ "successful" || "$line" =~ "complete" || "$line" =~ "finished" || "$line" =~ "success" ) && ! "$line" =~ "error" && ! "$line" =~ "fail" && "$event_processed" = false ]]; then # Extract VM/CT ID VM_ID=$(echo "$line" | grep -oP 'TASK \K[0-9]+') if [[ -z "$VM_ID" ]]; then VM_ID=$(echo "$line" | grep -oP 'VM \K[0-9]+') fi if [[ -z "$VM_ID" ]]; then VM_ID=$(echo "$line" | grep -oP 'CT \K[0-9]+') fi # Extract backup target BACKUP_TARGET=$(echo "$line" | grep -oP 'to ["\047]?\K[a-zA-Z0-9_-]+') if [[ -z "$BACKUP_TARGET" ]]; then BACKUP_TARGET=$(echo "$line" | grep -oP 'storage ["\047]?\K[a-zA-Z0-9_-]+') fi if [[ -z "$BACKUP_TARGET" ]]; then BACKUP_TARGET=$(echo "$line" | grep -oP 'target ["\047]?\K[a-zA-Z0-9_-]+') fi # Try to extract backup size if available BACKUP_SIZE=$(echo "$line" | grep -oP 'size: \K[0-9.]+[KMGT]B' || echo "$line" | grep -oP '[0-9.]+[KMGT]B' || echo "") # Try to extract duration if available DURATION=$(echo "$line" | grep -oP 'duration: \K[0-9.]+s' || echo "$line" | grep -oP 'in \K[0-9.]+s' || echo "$line" | grep -oP 'took \K[0-9.]+s' || echo "") # Try to extract compression rate if available COMPRESSION=$(echo "$line" | grep -oP 'compression: \K[0-9.]+%' || echo "$line" | grep -oP 'compressed: \K[0-9.]+%' || echo "") # Format additional information ADDITIONAL_INFO="" if [[ -n "$BACKUP_TARGET" || -n "$BACKUP_SIZE" || -n "$DURATION" || -n "$COMPRESSION" ]]; then ADDITIONAL_INFO+=" (" if [[ -n "$BACKUP_TARGET" ]]; then ADDITIONAL_INFO+="Target: $BACKUP_TARGET" if [[ -n "$BACKUP_SIZE" || -n "$DURATION" || -n "$COMPRESSION" ]]; then ADDITIONAL_INFO+=", " fi fi if [[ -n "$BACKUP_SIZE" ]]; then ADDITIONAL_INFO+="Size: $BACKUP_SIZE" if [[ -n "$DURATION" || -n "$COMPRESSION" ]]; then ADDITIONAL_INFO+=", " fi fi if [[ -n "$DURATION" ]]; then ADDITIONAL_INFO+="Duration: $DURATION" if [[ -n "$COMPRESSION" ]]; then ADDITIONAL_INFO+=", " fi fi if [[ -n "$COMPRESSION" ]]; then ADDITIONAL_INFO+="Compression: $COMPRESSION" fi ADDITIONAL_INFO+=")" fi # Try to determine backup type BACKUP_TYPE="" if [[ "$line" =~ "incremental" ]]; then BACKUP_TYPE=" (Incremental)" elif [[ "$line" =~ "differential" ]]; then BACKUP_TYPE=" (Differential)" elif [[ "$line" =~ "full" ]]; then BACKUP_TYPE=" (Full)" fi # Format the notification message if [[ -n "$VM_ID" ]]; then NAME=$(get_vm_name "$VM_ID") send_notification "✅ $(translate "Backup completed for:") $NAME$ADDITIONAL_INFO$BACKUP_TYPE" else send_notification "✅ $(translate "Backup completed")$ADDITIONAL_INFO$BACKUP_TYPE" fi event_processed=true fi # System update completed (NON-CRITICAL but immediate) if [[ "$update_complete" -eq 1 ]] && [[ "$event_processed" = false ]]; then # Match various patterns that indicate a completed update if [[ "$line" =~ "update" && ("$line" =~ "complete" || "$line" =~ "finished" || "$line" =~ "done" || "$line" =~ "success") && ! "$line" =~ "error" && ! "$line" =~ "fail" && ! "$line" =~ "unable" ]]; then # Try to determine what was updated update_type="system" if [[ "$line" =~ "proxmox" || "$line" =~ "pve" ]]; then update_type="Proxmox VE" elif [[ "$line" =~ "kernel" ]]; then update_type="kernel" elif [[ "$line" =~ "package" ]]; then update_type="package" fi # Try to extract version information if available version_info="" if [[ "$line" =~ "version" ]]; then version=$(echo "$line" | grep -oP 'version \K[0-9.]+' || echo "$line" | grep -oP 'to \K[0-9.]+' || echo "") if [[ -n "$version" ]]; then version_info=" ($(translate "version") $version)" fi fi # Try to extract package count if available package_count="" if [[ "$line" =~ "package" ]]; then count=$(echo "$line" | grep -oP '([0-9]+) package' || echo "") if [[ -n "$count" ]]; then package_count=" ($count $(translate "packages"))" fi fi # Try to get a list of updated packages if available package_list="" if [[ -f /var/log/apt/history.log ]]; then # Get the most recent upgrade entry recent_upgrade=$(tac /var/log/apt/history.log | grep -m 1 -A 20 "Upgrade:" | grep -v "End-Date:" | grep "Upgrade:") if [[ -n "$recent_upgrade" ]]; then # Extract package names and versions packages=$(echo "$recent_upgrade" | grep -oP '[a-zA-Z0-9.-]+:[a-zA-Z0-9]+ $$[^)]+$$' | head -n 5) if [[ -n "$packages" ]]; then package_list=" $(translate "Updated packages:") $(echo "$packages" | tr '\n' ', ' | sed 's/,$//')" # If there are more packages, indicate this total_packages=$(echo "$recent_upgrade" | grep -oP '[a-zA-Z0-9.-]+:[a-zA-Z0-9]+ $$[^)]+$$' | wc -l) if [[ $total_packages -gt 5 ]]; then package_list="$package_list, ... ($(translate "and") $((total_packages-5)) $(translate "more"))" fi fi fi fi # Check if a reboot is required reboot_required="" if [[ -f /var/run/reboot-required ]]; then reboot_required=" ⚠️ $(translate "System restart required to complete the update")" fi # Format the notification message send_notification "✅ $(translate "${update_type} update completed")${version_info}${package_count}${package_list}${reboot_required}" event_processed=true # Log the event logger -t proxmox-notify "${update_type} update completed" fi fi done # Si llegamos aquí, es porque tail -F terminó inesperadamente sleep 5 done } # Function: capture direct system events capture_direct_events() { # Variables to control notification frequency local last_load_notification=0 local last_temp_notification=0 local last_disk_space_notification=0 local last_cpu_notification=0 local last_ram_notification=0 local last_update_notification=0 local resource_interval=900 # 15 minutes for resources local update_interval=86400 # 24 hours for updates local disk_full_detected=false while true; do current_time=$(date +%s) # ===== CRITICAL IMMEDIATE NOTIFICATION EVENTS ===== # Disk full (CRITICAL - immediate) if [[ "$disk_full" -eq 1 ]]; then # Check for disks that are completely full (100%) full_disks=$(df -h | awk '$5 == "100%" {print $1 " (100% full)"}') # Check for disks that are nearly full (>=95%) nearly_full_disks=$(df -h | awk '$5 >= "95%" && $5 < "100%" {print $1 " (" $5 " full)"}') # Handle completely full disks if [[ -n "$full_disks" && "$disk_full_detected" = false ]]; then # Format the output for better readability formatted_full_disks=$(echo "$full_disks" | tr '\n' ', ' | sed 's/,$//' | sed 's/,/, /g') send_notification "🚨 $(translate "CRITICAL: Storage completely full:") $formatted_full_disks" disk_full_detected=true # Log the event logger -t proxmox-notify "CRITICAL: Storage completely full: $formatted_full_disks" elif [[ -z "$full_disks" ]]; then disk_full_detected=false fi # Handle nearly full disks (separate notification) if [[ -n "$nearly_full_disks" && "$disk_nearly_full_detected" = false ]]; then # Format the output for better readability formatted_nearly_full_disks=$(echo "$nearly_full_disks" | tr '\n' ', ' | sed 's/,$//' | sed 's/,/, /g') send_notification "⚠️ $(translate "WARNING: Storage nearly full:") $formatted_nearly_full_disks" disk_nearly_full_detected=true # Log the event logger -t proxmox-notify "WARNING: Storage nearly full: $formatted_nearly_full_disks" elif [[ -z "$nearly_full_disks" ]]; then disk_nearly_full_detected=false fi # Check for inode usage (sometimes disks can be full of inodes but not space) full_inodes="" while read -r filesystem inodes_used inodes_total iuse_percent mounted_on; do # Skip if the line doesn't have a valid percentage if ! [[ "$iuse_percent" =~ ^[0-9]+%$ ]]; then continue fi # Extract percentage number without the % sign percent_num=${iuse_percent/\%/} # Skip if percentage is less than 95 if [[ $percent_num -lt 95 ]]; then continue fi # Skip certain Proxmox-specific filesystems that normally show high inode usage # but don't represent a real problem if [[ "$filesystem" =~ ^/dev/mapper/pve- || "$filesystem" =~ ^/dev/pve/ || "$mounted_on" =~ ^/var/lib/vz/root/ || "$mounted_on" =~ ^/etc/pve/ || "$mounted_on" == "/var/lib/vz" && "$percent_num" -lt 98 ]]; then continue fi # Skip tmpfs and devtmpfs filesystems if [[ "$filesystem" == "tmpfs" || "$filesystem" == "devtmpfs" ]]; then continue fi # Skip if the filesystem has very few total inodes (less than 1000) # This helps avoid alerts on small or special filesystems if [[ $inodes_total -lt 1000 ]]; then continue fi # Get a more user-friendly name for the filesystem fs_name="$filesystem" if [[ "$mounted_on" != "/" ]]; then fs_name="$mounted_on ($filesystem)" fi # Add to our list of filesystems with high inode usage full_inodes+="$fs_name ($iuse_percent inodos usados, $inodes_used/$inodes_total), " done < <(df -i | grep -v "Filesystem" | awk '{print $1, $3, $2, $5, $6}') # Remove trailing comma and space if any full_inodes=${full_inodes%, } if [[ -n "$full_inodes" && "$inode_full_detected" = false ]]; then send_notification "⚠️ $(translate "WARNING: Inode usage critical:") $full_inodes" inode_full_detected=true # Log the event logger -t proxmox-notify "WARNING: Inode usage critical: $full_inodes" elif [[ -z "$full_inodes" ]]; then inode_full_detected=false fi fi # ===== NON-CRITICAL EVENTS WITH INTERVAL ===== # High system load (NON-CRITICAL - with interval) if [[ "$system_load_high" -eq 1 ]]; then # Get current load averages (1, 5, 15 minutes) load_1=$(awk '{print $1}' /proc/loadavg) load_5=$(awk '{print $2}' /proc/loadavg) load_15=$(awk '{print $3}' /proc/loadavg) # Get number of CPU cores if [[ -f /proc/cpuinfo ]]; then cpu_cores=$(grep -c "^processor" /proc/cpuinfo) else # Default to 1 if we can't determine cpu_cores=1 fi # Calculate thresholds based on number of cores warning_threshold=$(echo "$cpu_cores * 0.8" | bc -l) critical_threshold=$(echo "$cpu_cores * 1.5" | bc -l) # Format load averages for display load_info="1m: $load_1, 5m: $load_5, 15m: $load_15" # Check if load exceeds critical threshold if (( $(echo "$load_1 > $critical_threshold" | bc -l) )) && (( current_time - last_load_notification > resource_interval )); then # Get top processes consuming CPU if command -v top &>/dev/null; then top_processes=$(top -b -n 1 | head -n 12 | tail -n 5 | awk '{print $NF " (" $9 "% CPU)"}' | tr '\n' ', ' | sed 's/,$//') process_info=" $(translate "Top processes:") $top_processes" else process_info="" fi # Get memory usage if [[ -f /proc/meminfo ]]; then mem_total=$(grep "MemTotal" /proc/meminfo | awk '{print $2}') mem_available=$(grep "MemAvailable" /proc/meminfo | awk '{print $2}') mem_used_percent=$(echo "scale=1; 100 - ($mem_available * 100 / $mem_total)" | bc -l) memory_info=" $(translate "Memory usage:") ${mem_used_percent}%" else memory_info="" fi send_notification "🚨 $(translate "CRITICAL: Extremely high system load:") $load_info ($(translate "on") $cpu_cores $(translate "cores"))$memory_info$process_info" last_load_notification=$current_time # Log the event logger -t proxmox-notify "CRITICAL: Extremely high system load: $load_info" # Check if load exceeds warning threshold elif (( $(echo "$load_1 > $warning_threshold" | bc -l) )) && (( current_time - last_load_notification > resource_interval )); then # Get memory usage if [[ -f /proc/meminfo ]]; then mem_total=$(grep "MemTotal" /proc/meminfo | awk '{print $2}') mem_available=$(grep "MemAvailable" /proc/meminfo | awk '{print $2}') mem_used_percent=$(echo "scale=1; 100 - ($mem_available * 100 / $mem_total)" | bc -l) memory_info=" $(translate "Memory usage:") ${mem_used_percent}%" else memory_info="" fi send_notification "⚠️ $(translate "WARNING: High system load:") $load_info ($(translate "on") $cpu_cores $(translate "cores"))$memory_info" last_load_notification=$current_time # Log the event logger -t proxmox-notify "WARNING: High system load: $load_info" fi fi # Available updates (NON-CRITICAL - with daily interval) if [[ "$update_available" -eq 1 ]] && (( current_time - last_update_notification > update_interval )); then # Update package lists quietly apt-get update -qq &>/dev/null # Count total upgradable packages updates=$(apt list --upgradable 2>/dev/null | grep -v "Listing..." | wc -l) # Check for security updates specifically security_updates=$(apt list --upgradable 2>/dev/null | grep -i security | wc -l) # Check for Proxmox VE updates specifically proxmox_updates=$(apt list --upgradable 2>/dev/null | grep -E "^(proxmox-ve|pve-manager|pve-kernel|pve-container|pve-firewall|pve-ha-manager|pve-docs|pve-qemu-kvm|pve-storage|pve-cluster|pve-gui|pve-headers|pve-firmware|pve-zsync|pve-guest-common)" | wc -l) # Get Proxmox version information current_pve_version=$(pveversion -v 2>/dev/null | grep -oP "pve-manager/\K[0-9]+\.[0-9]+" || echo "unknown") # Check if there's a new major Proxmox version available new_pve_version="" if [[ $proxmox_updates -gt 0 ]]; then new_version_check=$(apt list --upgradable 2>/dev/null | grep "^pve-manager/" | grep -oP "pve-manager/\K[0-9]+\.[0-9]+" || echo "") if [[ -n "$new_version_check" && "$new_version_check" != "$current_pve_version" ]]; then new_pve_version="$new_version_check" fi fi # Get list of specific packages that have updates if [[ $updates -gt 0 ]]; then # Get a list of all upgradable packages (limited to 10 to avoid too long messages) package_list=$(apt list --upgradable 2>/dev/null | grep -v "Listing..." | head -n 10 | awk -F/ '{print $1}' | tr '\n' ', ' | sed 's/,$//') # If there are more than 10 packages, indicate this if [[ $updates -gt 10 ]]; then package_list="$package_list, ... ($(translate "and") $((updates-10)) $(translate "more"))" fi # Format the notification message update_msg="ℹ️ $(translate "Updates available:") $updates" if [[ $security_updates -gt 0 ]]; then update_msg="$update_msg ($(translate "including") $security_updates $(translate "security updates"))" fi # If there's a new Proxmox version, highlight it if [[ -n "$new_pve_version" ]]; then update_msg="🔄 $(translate "NEW PROXMOX VERSION AVAILABLE:") $new_pve_version ($(translate "current:") $current_pve_version) $update_msg" elif [[ $proxmox_updates -gt 0 ]]; then update_msg="🔄 $(translate "Proxmox updates available") ($proxmox_updates $(translate "packages")) $update_msg" fi send_notification "$update_msg" last_update_notification=$current_time # Log the event logger -t proxmox-notify "Updates available: $updates packages" fi fi # Low disk space (NON-CRITICAL - with interval) if [[ "$low_disk_space" -eq 1 ]] && (( current_time - last_disk_space_notification > resource_interval )); then # Check partitions with critical space (95-99% usage) critical_space=$(df -h | awk '$5 ~ /9[5-9]%/ && $5 != "100%" {print $1 " (" $5 " full, " $4 " free)"}') # Check partitions with warning space (90-94% usage) warning_space=$(df -h | awk '$5 ~ /9[0-4]%/ {print $1 " (" $5 " full, " $4 " free)"}') # Check partitions with attention space (85-89% usage) attention_space=$(df -h | awk '$5 ~ /8[5-9]%/ {print $1 " (" $5 " full, " $4 " free)"}') # Format messages for better readability if [[ -n "$critical_space" ]]; then critical_space=$(echo "$critical_space" | tr '\n' ', ' | sed 's/,$//' | sed 's/,/, /g') fi if [[ -n "$warning_space" ]]; then warning_space=$(echo "$warning_space" | tr '\n' ', ' | sed 's/,$//' | sed 's/,/, /g') fi if [[ -n "$attention_space" ]]; then attention_space=$(echo "$attention_space" | tr '\n' ', ' | sed 's/,$//' | sed 's/,/, /g') fi # Build notification message disk_space_msg="" if [[ -n "$critical_space" ]]; then disk_space_msg+="🚨 $(translate "CRITICAL: Very low disk space:") $critical_space" fi if [[ -n "$warning_space" ]]; then if [[ -n "$disk_space_msg" ]]; then disk_space_msg+=" " fi disk_space_msg+="⚠️ $(translate "WARNING: Low disk space:") $warning_space" fi if [[ -n "$attention_space" && -z "$critical_space" && -z "$warning_space" ]]; then # Only show attention level if no higher alerts are present disk_space_msg+="ℹ️ $(translate "ATTENTION: Disk space getting low:") $attention_space" fi # Send notification if any space issues were detected if [[ -n "$disk_space_msg" ]]; then send_notification "$disk_space_msg" last_disk_space_notification=$current_time # Log the event logger -t proxmox-notify "Low disk space detected" # Suggest cleanup options for Proxmox if [[ -d /var/lib/vz/dump || -d /var/lib/vz/template ]]; then cleanup_msg="$(translate "TIP: Consider cleaning up old backups with:") 'rm -f /var/lib/vz/dump/vzdump-*.tar' $(translate "or old templates with:") 'rm -f /var/lib/vz/template/cache/*.tar.gz'" send_notification "$cleanup_msg" fi fi fi # High CPU usage (NON-CRITICAL - with interval) if [[ "$cpu_high" -eq 1 ]] && (( current_time - last_cpu_notification > resource_interval )); then # Get number of CPU cores if [[ -f /proc/cpuinfo ]]; then cpu_cores=$(grep -c "^processor" /proc/cpuinfo) else # Default to 1 if we can't determine cpu_cores=1 fi # Use mpstat if available, otherwise use top if command -v mpstat &>/dev/null; then cpu_usage=$(mpstat 1 1 | awk '/Average:/ {print 100 - $NF}') else cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2 + $4}') fi # Round to one decimal place cpu_usage=$(printf "%.1f" $cpu_usage) # Get CPU temperature if available cpu_temp="" if command -v sensors &>/dev/null; then # Try to get CPU temperature from sensors cpu_temp=$(sensors | grep -i "core\|temp" | grep -oP '\+\K[0-9.]+°C' | sort -nr | head -n1) elif [[ -f /sys/class/thermal/thermal_zone0/temp ]]; then # Alternative method using sysfs cpu_temp=$(echo "scale=1; $(cat /sys/class/thermal/thermal_zone0/temp) / 1000" | bc -l) cpu_temp="${cpu_temp}°C" fi # Add temperature info if available temp_info="" if [[ -n "$cpu_temp" ]]; then temp_info=" ($(translate "Temperature:") $cpu_temp)" fi # Get top CPU consuming processes process_info="" if command -v top &>/dev/null; then top_processes=$(top -bn1 -o %CPU | head -n 12 | tail -n 5 | awk '{print $NF " (" $9 "%)"}' | tr '\n' ', ' | sed 's/,$//') process_info=" $(translate "Top processes:") $top_processes" fi # Check for critical CPU usage (>95%) if (( $(echo "$cpu_usage > 95" | bc -l) )); then send_notification "🚨 $(translate "CRITICAL: Very high CPU usage:") ${cpu_usage}% ($(translate "on") $cpu_cores $(translate "cores"))${temp_info}${process_info}" last_cpu_notification=$current_time # Log the event logger -t proxmox-notify "CRITICAL: Very high CPU usage: ${cpu_usage}%" # Check for high CPU usage (>85%) elif (( $(echo "$cpu_usage > 85" | bc -l) )); then send_notification "⚠️ $(translate "WARNING: High CPU usage:") ${cpu_usage}% ($(translate "on") $cpu_cores $(translate "cores"))${temp_info}${process_info}" last_cpu_notification=$current_time # Log the event logger -t proxmox-notify "WARNING: High CPU usage: ${cpu_usage}%" fi # Check for sustained moderate CPU usage (>70% for extended period) # This requires tracking previous readings if [[ -z "$cpu_usage_history" ]]; then cpu_usage_history="$cpu_usage" else cpu_usage_history="$cpu_usage_history,$cpu_usage" # Keep only the last 5 readings cpu_usage_history=$(echo "$cpu_usage_history" | awk -F, '{for(i=NF-4>1?NF-4:1; i<=NF; i++) printf("%s%s", $i, i==NF?"":",") }') # Calculate average of last readings cpu_usage_avg=$(echo "$cpu_usage_history" | awk -F, '{sum=0; for(i=1; i<=NF; i++) sum+=$i; print sum/NF}') # If average is >70% and we haven't sent a notification recently if (( $(echo "$cpu_usage_avg > 70" | bc -l) )) && (( current_time - last_cpu_sustained_notification > resource_interval * 3 )); then send_notification "ℹ️ $(translate "ATTENTION: Sustained CPU usage:") ${cpu_usage_avg}% $(translate "average over time") ($(translate "on") $cpu_cores $(translate "cores"))${temp_info}" last_cpu_sustained_notification=$current_time # Log the event logger -t proxmox-notify "ATTENTION: Sustained CPU usage: ${cpu_usage_avg}%" fi fi fi # High RAM usage (NON-CRITICAL - with interval) if [[ "$ram_high" -eq 1 ]] && (( current_time - last_ram_notification > resource_interval )); then # Get detailed memory information total_ram=$(free -m | awk '/Mem:/ {print $2}') used_ram=$(free -m | awk '/Mem:/ {print $3}') free_ram=$(free -m | awk '/Mem:/ {print $4}') shared_ram=$(free -m | awk '/Mem:/ {print $5}') cache_ram=$(free -m | awk '/Mem:/ {print $6}') available_ram=$(free -m | awk '/Mem:/ {print $7}') # Calculate percentages ram_usage=$(echo "scale=1; ($total_ram - $available_ram) * 100 / $total_ram" | bc -l) ram_usage_no_cache=$(echo "scale=1; ($used_ram - $cache_ram) * 100 / $total_ram" | bc -l) # Get swap information total_swap=$(free -m | awk '/Swap:/ {print $2}') used_swap=$(free -m | awk '/Swap:/ {print $3}') # Calculate swap percentage if swap exists swap_info="" if [[ $total_swap -gt 0 ]]; then swap_percent=$(echo "scale=1; $used_swap * 100 / $total_swap" | bc -l) swap_info=", $(translate "Swap:") ${swap_percent}% (${used_swap}MB/${total_swap}MB)" fi # Format memory values for display ram_info="${ram_usage}% (${used_ram}MB/${total_ram}MB)" ram_info_detailed="Used: ${used_ram}MB, Free: ${free_ram}MB, Cache: ${cache_ram}MB, Available: ${available_ram}MB" # Get top memory consuming processes process_info="" if command -v ps &>/dev/null; then top_processes=$(ps aux --sort=-%mem | head -n 6 | tail -n 5 | awk '{print $11 " (" int($4) "%)"}' | tr '\n' ', ' | sed 's/,$//') process_info=" $(translate "Top processes:") $top_processes" fi # Check for critical RAM usage (>95%) if (( $(echo "$ram_usage > 95" | bc -l) )); then send_notification "🚨 $(translate "CRITICAL: Very high RAM usage:") ${ram_info}${swap_info} ${ram_info_detailed}${process_info}" last_ram_notification=$current_time # Log the event logger -t proxmox-notify "CRITICAL: Very high RAM usage: ${ram_usage}%" # Check for high RAM usage (>85%) elif (( $(echo "$ram_usage > 85" | bc -l) )); then send_notification "⚠️ $(translate "WARNING: High RAM usage:") ${ram_info}${swap_info} ${ram_info_detailed}${process_info}" last_ram_notification=$current_time # Log the event logger -t proxmox-notify "WARNING: High RAM usage: ${ram_usage}%" # Check for high RAM usage excluding cache (>80%) # This is important because Linux uses free RAM for cache, but can free it when needed elif (( $(echo "$ram_usage_no_cache > 80" | bc -l) )); then send_notification "ℹ️ $(translate "ATTENTION: High RAM usage (excluding cache):") ${ram_usage_no_cache}%${swap_info} ${ram_info_detailed}${process_info}" last_ram_notification=$current_time # Log the event logger -t proxmox-notify "ATTENTION: High RAM usage (excluding cache): ${ram_usage_no_cache}%" fi # Check for high swap usage if swap exists and is being used if [[ $total_swap -gt 0 && $used_swap -gt 0 ]]; then # Only alert on high swap if we haven't already alerted on RAM if (( $(echo "$swap_percent > 50" | bc -l) )) && (( $(echo "$ram_usage <= 85" | bc -l) )) && (( current_time - last_swap_notification > resource_interval )); then send_notification "⚠️ $(translate "WARNING: High swap usage:") ${swap_percent}% (${used_swap}MB/${total_swap}MB) ${ram_info_detailed}${process_info}" last_swap_notification=$current_time # Log the event logger -t proxmox-notify "WARNING: High swap usage: ${swap_percent}%" fi fi fi # High temperature (NON-CRITICAL - with interval) if [[ "$temp_high" -eq 1 ]] && (( current_time - last_temp_notification > resource_interval )); then # Initialize variables temp_detected=false max_temp=0 temp_sources="" # Method 1: Use 'sensors' command if available if command -v sensors &>/dev/null; then # Update sensors database if needed if [[ ! -f /var/run/proxmox-notify-sensors-updated ]]; then sensors-detect --auto &>/dev/null || true touch /var/run/proxmox-notify-sensors-updated fi # Try to get CPU temperature from various patterns cpu_temp=$(sensors | grep -E 'Package id 0:|Core [0-9]+:|CPU:|Tdie:|Tctl:' | grep -oP '\+\K[0-9.]+°C|[0-9.]+°C' | sed 's/°C//' | sort -nr | head -n1) if [[ -n "$cpu_temp" && "$cpu_temp" != "0" ]]; then temp_detected=true if (( $(echo "$cpu_temp > $max_temp" | bc -l) )); then max_temp=$cpu_temp temp_sources="CPU" fi fi # Try to get motherboard/system temperature mb_temp=$(sensors | grep -E 'MB Temperature|System Temp|Board Temp|Motherboard' | grep -oP '\+\K[0-9.]+°C|[0-9.]+°C' | sed 's/°C//' | sort -nr | head -n1) if [[ -n "$mb_temp" && "$mb_temp" != "0" ]]; then temp_detected=true if (( $(echo "$mb_temp > $max_temp" | bc -l) )); then max_temp=$mb_temp temp_sources="Motherboard" elif (( $(echo "$mb_temp == $max_temp" | bc -l) )); then temp_sources="$temp_sources, Motherboard" fi fi # Try to get GPU temperature if available gpu_temp=$(sensors | grep -E 'GPU|VGA' | grep -oP '\+\K[0-9.]+°C|[0-9.]+°C' | sed 's/°C//' | sort -nr | head -n1) if [[ -n "$gpu_temp" && "$gpu_temp" != "0" ]]; then temp_detected=true if (( $(echo "$gpu_temp > $max_temp" | bc -l) )); then max_temp=$gpu_temp temp_sources="GPU" elif (( $(echo "$gpu_temp == $max_temp" | bc -l) )); then temp_sources="$temp_sources, GPU" fi fi # Try to get disk temperature if available disk_temp=$(sensors | grep -E 'Drive Temp|Disk Temp|Storage Temp' | grep -oP '\+\K[0-9.]+°C|[0-9.]+°C' | sed 's/°C//' | sort -nr | head -n1) if [[ -n "$disk_temp" && "$disk_temp" != "0" ]]; then temp_detected=true if (( $(echo "$disk_temp > $max_temp" | bc -l) )); then max_temp=$disk_temp temp_sources="Disk" elif (( $(echo "$disk_temp == $max_temp" | bc -l) )); then temp_sources="$temp_sources, Disk" fi fi fi # Method 2: Use sysfs thermal zones if sensors not available or no temp detected if ! $temp_detected && [[ -d /sys/class/thermal ]]; then for zone in /sys/class/thermal/thermal_zone*/temp; do if [[ -f "$zone" ]]; then zone_temp=$(echo "scale=1; $(cat "$zone") / 1000" | bc -l) if [[ -n "$zone_temp" && "$zone_temp" != "0" ]]; then temp_detected=true if (( $(echo "$zone_temp > $max_temp" | bc -l) )); then max_temp=$zone_temp # Try to get zone type zone_dir=$(dirname "$zone") if [[ -f "$zone_dir/type" ]]; then zone_type=$(cat "$zone_dir/type") temp_sources="$zone_type" else temp_sources="Thermal Zone" fi fi fi fi done fi # Method 3: Use ipmitool if available and no temp detected yet if ! $temp_detected && command -v ipmitool &>/dev/null; then ipmi_temp=$(ipmitool sdr type temperature 2>/dev/null | grep -i -E 'CPU|System|Ambient|Inlet|Exhaust' | head -1 | awk '{print $4}') if [[ -n "$ipmi_temp" && "$ipmi_temp" != "0" ]]; then temp_detected=true max_temp=$ipmi_temp temp_sources="IPMI" fi fi # Method 4: Use hddtemp for disk temperatures if available if command -v hddtemp &>/dev/null; then for disk in /dev/sd[a-z]; do if [[ -b "$disk" ]]; then disk_temp=$(hddtemp "$disk" 2>/dev/null | grep -oP '[0-9.]+°C' | sed 's/°C//') if [[ -n "$disk_temp" && "$disk_temp" != "0" ]]; then temp_detected=true if (( $(echo "$disk_temp > $max_temp" | bc -l) )); then max_temp=$disk_temp disk_name=$(basename "$disk") temp_sources="Disk $disk_name" elif (( $(echo "$disk_temp == $max_temp" | bc -l) )); then disk_name=$(basename "$disk") temp_sources="$temp_sources, Disk $disk_name" fi fi fi done fi # If we detected a temperature, check against thresholds if $temp_detected && [[ -n "$max_temp" && "$max_temp" != "0" ]]; then # Critical temperature (>90°C) if (( $(echo "$max_temp > 90" | bc -l) )); then send_notification "🚨 $(translate "CRITICAL: Dangerously high temperature:") ${max_temp}°C (${temp_sources})" last_temp_notification=$current_time # Log the event logger -t proxmox-notify "CRITICAL: Dangerously high temperature: ${max_temp}°C (${temp_sources})" # High temperature (>80°C) elif (( $(echo "$max_temp > 80" | bc -l) )); then send_notification "⚠️ $(translate "WARNING: High temperature:") ${max_temp}°C (${temp_sources})" last_temp_notification=$current_time # Log the event logger -t proxmox-notify "WARNING: High temperature: ${max_temp}°C (${temp_sources})" # Elevated temperature (>70°C) elif (( $(echo "$max_temp > 70" | bc -l) )); then send_notification "ℹ️ $(translate "ATTENTION: Elevated temperature:") ${max_temp}°C (${temp_sources})" last_temp_notification=$current_time # Log the event logger -t proxmox-notify "ATTENTION: Elevated temperature: ${max_temp}°C (${temp_sources})" fi fi fi # Pause between checks sleep 30 done } # Function to start the notification service start_notification_service() { if [[ ! -f /etc/systemd/system/proxmox-telegram.service ]]; then install_systemd_service fi if systemctl is-active --quiet proxmox-telegram.service; then whiptail --title "$(translate "Information")" \ --msgbox "$(translate "The notification service is already running.")" 10 70 else systemctl start proxmox-telegram.service if systemctl is-active --quiet proxmox-telegram.service; then whiptail --title "$(translate "Started")" \ --msgbox "$(translate "The service has been started successfully.")" 10 70 else whiptail --title "$(translate "Error")" \ --msgbox "$(translate "Could not start the notification service.")" 10 70 fi fi } # Function to stop the service stop_notification_service() { if [[ -f /etc/systemd/system/proxmox-telegram.service ]]; then if systemctl is-active --quiet proxmox-telegram.service; then systemctl stop proxmox-telegram.service sleep 2 fi if ! systemctl is-active --quiet proxmox-telegram.service; then whiptail --title "$(translate "Stopped")" \ --msgbox "$(translate "The service has been stopped successfully.")" 10 70 else whiptail --title "$(translate "Error")" \ --msgbox "$(translate "Could not stop the notification service.")" 10 70 fi else whiptail --title "$(translate "Information")" \ --msgbox "$(translate "The notification service is not installed yet.")" 10 70 fi } # Function to check service status check_service_status() { clear if [[ -f /etc/systemd/system/proxmox-telegram.service ]]; then systemctl status proxmox-telegram.service else echo "$(translate "The service is not installed.")" fi echo msg_success "$(translate "Press Enter to return to the menu...")" read -r } # Function to remove the systemd service remove_systemd_service() { if [[ -f /etc/systemd/system/proxmox-telegram.service ]]; then if systemctl is-active --quiet proxmox-telegram.service; then systemctl stop proxmox-telegram.service fi systemctl disable proxmox-telegram.service rm -f /etc/systemd/system/proxmox-telegram.service systemctl daemon-reexec whiptail --title "$(translate "Removed")" \ --msgbox "$(translate "The service has been removed successfully. You can reinstall it from the menu if desired.")" 10 70 else whiptail --title "$(translate "Information")" \ --msgbox "$(translate "The service does not exist, nothing to remove.")" 10 70 fi } # Functions required by systemd start_silent() { mkdir -p "$PID_DIR" capture_journal_events > /dev/null 2>&1 & echo $! > "$PID_DIR/journal.pid" journal_pid=$! capture_direct_events > /dev/null 2>&1 & echo $! > "$PID_DIR/direct.pid" direct_pid=$! echo $$ > "$PID_DIR/service.pid" # Wait for both processes to finish (keeps systemd service alive) wait $journal_pid wait $direct_pid } stop_silent() { send_notification "⚠️ $(translate "System is shutting down on") $(hostname) at $(date)" kill $(cat "$PID_DIR/journal.pid" 2>/dev/null) 2>/dev/null kill $(cat "$PID_DIR/direct.pid" 2>/dev/null) 2>/dev/null kill $(cat "$PID_DIR/service.pid" 2>/dev/null) 2>/dev/null rm -f "$PID_DIR"/*.pid } # Function to install the service as a systemd service install_systemd_service() { mkdir -p "$PID_DIR" cat > "$WRAPPER_PATH" < /etc/systemd/system/proxmox-telegram.service <&1 1>&2 2>&3) if [[ $? -ne 0 ]]; then exit 0 fi case "$OPTION" in 1) configure_telegram ;; 2) configure_notifications ;; 3) start_notification_service ;; 4) stop_notification_service ;; 5) check_service_status ;; 6) remove_systemd_service ;; 7) exit 0 ;; esac done } case "$1" in start_silent) start_silent ;; stop_silent) stop_silent ;; *) main_menu ;; esac