diff --git a/AppImage/components/process-detail-modal.tsx b/AppImage/components/process-detail-modal.tsx new file mode 100644 index 00000000..ac86f2a7 --- /dev/null +++ b/AppImage/components/process-detail-modal.tsx @@ -0,0 +1,230 @@ +"use client" + +import { useEffect, useState } from "react" +import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogDescription } from "./ui/dialog" +import { Input } from "./ui/input" +import { ScrollArea } from "./ui/scroll-area" +import { Cpu, MemoryStick, Search } from "lucide-react" +import { fetchApi } from "@/lib/api-config" +import { ProcessInfoModal } from "./process-info-modal" + +interface ProcessInfo { + pid: number + user: string + cpu: number + mem: number + rss_kb: number + command: string +} + +interface ProcessesResponse { + processes: ProcessInfo[] + sort: "cpu" | "mem" + captured_at: number +} + +interface ProcessDetailModalProps { + open: boolean + onOpenChange: (open: boolean) => void + /** Which metric the parent card represents (drives default sort + emphasis) */ + sort: "cpu" | "mem" +} + +const REFRESH_MS = 5000 +const LIMIT = 25 + +const formatRss = (kb: number): string => { + if (kb >= 1024 * 1024) return `${(kb / 1024 / 1024).toFixed(2)} GB` + if (kb >= 1024) return `${(kb / 1024).toFixed(1)} MB` + return `${kb} KB` +} + +export function ProcessDetailModal({ open, onOpenChange, sort }: ProcessDetailModalProps) { + const [data, setData] = useState(null) + const [error, setError] = useState(null) + const [loading, setLoading] = useState(false) + const [filter, setFilter] = useState("") + const [selectedPid, setSelectedPid] = useState(null) + + const fetchProcesses = async (silent = false) => { + if (!silent) setLoading(true) + setError(null) + try { + const res = await fetchApi(`/api/processes?sort=${sort}&limit=${LIMIT}`) + setData(res) + } catch (e: any) { + setError(e?.message || "Failed to fetch processes") + } finally { + if (!silent) setLoading(false) + } + } + + useEffect(() => { + if (!open) return + fetchProcesses() + const id = setInterval(() => fetchProcesses(true), REFRESH_MS) + return () => clearInterval(id) + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [open, sort]) + + // Reset filter when dialog closes + useEffect(() => { + if (!open) setFilter("") + }, [open]) + + const filtered = (data?.processes ?? []).filter((p) => { + if (!filter) return true + const q = filter.toLowerCase() + return ( + p.command.toLowerCase().includes(q) || + p.user.toLowerCase().includes(q) || + String(p.pid).includes(q) + ) + }) + + const Icon = sort === "cpu" ? Cpu : MemoryStick + const title = sort === "cpu" ? "Top processes by CPU" : "Top processes by Memory" + const description = + sort === "cpu" + ? "Snapshot from `ps` sorted by CPU usage. Auto-refreshes every 5 s while this dialog is open." + : "Snapshot from `ps` sorted by resident memory. Auto-refreshes every 5 s while this dialog is open." + + // Accent palette matched to the Overview cards: CPU Usage donut uses + // blue (#3b82f6), Memory cached uses rgba(99,102,241,0.55) — we keep + // the same hues so the modal feels like a continuation of the card. + const accent = sort === "cpu" + ? { dot: "#3b82f6", bar: "#3b82f6", text: "text-blue-500" } + : { dot: "#6366f1", bar: "#6366f1", text: "text-indigo-400" } + + // Scale bars to the largest value in the (filtered) list so the visual + // ranking is preserved even when no process is near 100 %. CPU can + // exceed 100 % on multi-threaded apps — falling back to max=1 prevents + // a divide-by-zero when the list is empty. + const maxPrimary = Math.max( + 1, + ...filtered.map((p) => (sort === "cpu" ? p.cpu : p.mem)) + ) + + // Mobile drops PID + USER; desktop keeps the full 5-column layout. + // CPU and MEM columns are wider on desktop with a real gap between + // them so the two metrics don't feel glued together. + const gridCols = + "grid-cols-[minmax(0,1fr)_70px_90px] sm:grid-cols-[60px_96px_minmax(140px,1fr)_110px_120px]" + + return ( + <> + + + + + + {title} + + {description} + + +
+ + setFilter(e.target.value)} + className="pl-8 h-8 text-sm" + /> +
+ + {error ? ( +
{error}
+ ) : ( + +
+ {/* Sticky solid header so scrolled rows don't bleed through */} +
+
PID
+
User
+
Command
+
CPU %
+
{sort === "mem" ? "Memory" : "Mem %"}
+
+ + {filtered.length === 0 && !loading ? ( +
+ No processes match the filter +
+ ) : ( + filtered.map((p) => { + const primary = sort === "cpu" ? p.cpu : p.mem + const barPct = Math.min(100, (primary / maxPrimary) * 100) + return ( + + ) + }) + )} +
+
+ )} + + {data?.captured_at && ( +
+ Captured {new Date(data.captured_at * 1000).toLocaleTimeString()} · {filtered.length} of {data.processes.length} shown +
+ )} +
+
+ + setSelectedPid(null)} + /> + + ) +} diff --git a/AppImage/components/process-info-modal.tsx b/AppImage/components/process-info-modal.tsx new file mode 100644 index 00000000..2345cc42 --- /dev/null +++ b/AppImage/components/process-info-modal.tsx @@ -0,0 +1,224 @@ +"use client" + +import { useEffect, useState } from "react" +import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogDescription } from "./ui/dialog" +import { ScrollArea } from "./ui/scroll-area" +import { Activity, FileText, HardDrive, Clock } from "lucide-react" +import { fetchApi } from "@/lib/api-config" + +interface ProcessDetail { + pid: number + comm: string + cmdline: string + exe: string | null + cwd: string | null + state: string + ppid: number + parent_name: string | null + threads: number + vm_rss_kb: number + vm_size_kb: number + vm_swap_kb: number + user: string + group: string + uid: number + gid: number + start_time: string | null + elapsed: string | null + cpu: number + mem: number + io_read_bytes: number | null + io_write_bytes: number | null + fd_count: number | null + captured_at: number +} + +interface ProcessInfoModalProps { + pid: number | null + accent: { dot: string; bar: string; text: string } + onClose: () => void +} + +const REFRESH_MS = 3000 + +const formatKb = (kb: number | null | undefined): string => { + if (kb == null) return "—" + if (kb >= 1024 * 1024) return `${(kb / 1024 / 1024).toFixed(2)} GB` + if (kb >= 1024) return `${(kb / 1024).toFixed(1)} MB` + return `${kb} KB` +} + +const formatBytes = (b: number | null | undefined): string => { + if (b == null) return "—" + if (b >= 1024 * 1024 * 1024) return `${(b / 1024 / 1024 / 1024).toFixed(2)} GB` + if (b >= 1024 * 1024) return `${(b / 1024 / 1024).toFixed(1)} MB` + if (b >= 1024) return `${(b / 1024).toFixed(1)} KB` + return `${b} B` +} + +// Linux process states from /proc//status. The first char of `State:` +// is the canonical letter — the rest of the field is a human label like +// "(running)". We expand the bare letter to something readable. +const stateLabel = (state: string): string => { + const letter = (state || "").trim().charAt(0).toUpperCase() + const map: Record = { + R: "Running", + S: "Sleeping", + D: "Disk wait", + Z: "Zombie", + T: "Stopped", + t: "Tracing stop", + X: "Dead", + I: "Idle", + } + return map[letter] || state || "—" +} + +export function ProcessInfoModal({ pid, accent, onClose }: ProcessInfoModalProps) { + const [data, setData] = useState(null) + const [error, setError] = useState(null) + const [loading, setLoading] = useState(false) + const open = pid != null + + const fetchDetail = async (silent = false) => { + if (pid == null) return + if (!silent) setLoading(true) + setError(null) + try { + const res = await fetchApi(`/api/processes/${pid}`) + setData(res) + } catch (e: any) { + // 404 means the process exited while the modal was open — surface a + // clear message instead of stale data, but don't auto-close (user may + // want to read the last snapshot). + setError(e?.message?.includes("404") ? "Process exited" : (e?.message || "Failed to fetch process")) + if (e?.message?.includes("404")) setData(null) + } finally { + if (!silent) setLoading(false) + } + } + + useEffect(() => { + if (pid == null) { + setData(null) + setError(null) + return + } + fetchDetail() + const id = setInterval(() => fetchDetail(true), REFRESH_MS) + return () => clearInterval(id) + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [pid]) + + return ( + { if (!v) onClose() }}> + + + + + {data?.comm || "Process"} + PID {pid} + + + Live snapshot from /proc/{pid}. Auto-refreshes every {REFRESH_MS / 1000} s while open. + + + + {error && !data ? ( +
{error}
+ ) : !data ? ( +
+ {loading ? "Loading…" : "—"} +
+ ) : ( + +
+ {/* Overview */} +
} title="Overview"> + + + + + + +
+ + {/* Resources */} +
} title="Resources"> + + + + + + + +
+ + {/* Command */} +
} title="Command"> + + + + +
+ + {/* Times */} +
} title="Lifetime"> + + +
+
+
+ )} + + {data?.captured_at && ( +
+ Captured {new Date(data.captured_at * 1000).toLocaleTimeString()} + {error ? ` · ${error}` : ""} +
+ )} +
+
+ ) +} + +function Section({ icon, title, children }: { icon: React.ReactNode; title: string; children: React.ReactNode }) { + return ( +
+
+ {icon} + {title} +
+
{children}
+
+ ) +} + +function Row({ + label, + value, + mono, + wrap, + valueClass, +}: { + label: string + value: string + mono?: boolean + wrap?: boolean + valueClass?: string +}) { + return ( +
+
{label}
+
+ {value} +
+
+ ) +} diff --git a/AppImage/components/system-overview.tsx b/AppImage/components/system-overview.tsx index c5cc1e94..81da23ea 100644 --- a/AppImage/components/system-overview.tsx +++ b/AppImage/components/system-overview.tsx @@ -4,10 +4,11 @@ import { useState, useEffect } from "react" import { Card, CardContent, CardHeader, CardTitle } from "./ui/card" import { Progress } from "./ui/progress" import { Badge } from "./ui/badge" -import { Cpu, MemoryStick, Thermometer, Server, Zap, AlertCircle, HardDrive, Network } from "lucide-react" +import { Cpu, MemoryStick, Thermometer, Server, Zap, AlertCircle, HardDrive, Network, ChevronRight } from "lucide-react" import { NodeMetricsCharts } from "./node-metrics-charts" import { NetworkTrafficChart } from "./network-traffic-chart" import { TemperatureDetailModal } from "./temperature-detail-modal" +import { ProcessDetailModal } from "./process-detail-modal" import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "./ui/select" import { fetchApi } from "../lib/api-config" import { formatNetworkTraffic, getNetworkUnit } from "../lib/format-network" @@ -187,6 +188,8 @@ export function SystemOverview() { const [networkTotals, setNetworkTotals] = useState<{ received: number; sent: number }>({ received: 0, sent: 0 }) const [networkUnit, setNetworkUnit] = useState<"Bytes" | "Bits">("Bytes") // Added networkUnit state const [tempModalOpen, setTempModalOpen] = useState(false) + const [cpuProcModalOpen, setCpuProcModalOpen] = useState(false) + const [memProcModalOpen, setMemProcModalOpen] = useState(false) useEffect(() => { const fetchAllData = async () => { @@ -400,10 +403,17 @@ export function SystemOverview() {
{/* ── CPU Usage (preview restyle v2: tamaño igual a System Info, bars más anchas) ── */} - + setCpuProcModalOpen(true)} + title="View top processes by CPU" + > CPU Usage - +
+ + +
@@ -443,10 +453,17 @@ export function SystemOverview() { {/* ── Memory (preview restyle v2: tamaño igual a System Info, bars más anchas) ── */} - + setMemProcModalOpen(true)} + title="View top processes by memory" + > Memory - +
+ + +
@@ -566,12 +583,24 @@ export function SystemOverview() {
- + + + +
diff --git a/AppImage/components/virtual-machines.tsx b/AppImage/components/virtual-machines.tsx index e5c2b070..6898dc68 100644 --- a/AppImage/components/virtual-machines.tsx +++ b/AppImage/components/virtual-machines.tsx @@ -1129,7 +1129,7 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => { .reduce((sum, vm) => sum + (vm.maxmem || 0), 0) / 1024 ** 3).toFixed(1) }, [safeVMData]) - const { data: systemData } = useSWR<{ memory_total: number; memory_used: number; memory_usage: number }>( + const { data: systemData } = useSWR<{ memory_total: number; memory_used: number; memory_usage: number; cpu_cores?: number; cpu_threads?: number }>( "/api/system", fetcher, { @@ -1346,6 +1346,7 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => { const inUseVCPU = safeVMData .filter((vm) => vm.status === "running") .reduce((sum, vm) => sum + (vm.maxcpu || 0), 0) + const hostThreads = systemData?.cpu_threads ?? systemData?.cpu_cores ?? 0 const stroke = allocPct >= 90 ? '#ef4444' : allocPct >= 75 ? '#f59e0b' : '#3b82f6' return ( @@ -1374,11 +1375,11 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
Configured - {configuredVCPU || '—'} vCPU + {configuredVCPU || '—'}{hostThreads ? ` / ${hostThreads}` : ''} vCPU
In use - {inUseVCPU || '—'} vCPU + {inUseVCPU || '—'}{hostThreads ? ` / ${hostThreads}` : ''} vCPU
diff --git a/AppImage/scripts/flask_server.py b/AppImage/scripts/flask_server.py index 60bd4667..4f1e8c1a 100644 --- a/AppImage/scripts/flask_server.py +++ b/AppImage/scripts/flask_server.py @@ -7715,6 +7715,221 @@ def api_system(): pass return jsonify({'error': str(e)}), 500 +@app.route('/api/processes', methods=['GET']) +@require_auth +def api_processes(): + """Top processes by CPU or memory using `ps` (pre-installed everywhere, + no daemon, no extra dependency). Called from the CPU Usage and Memory + "more info" modals on the Overview page — fetched only when the modal + opens (and on its in-modal refresh tick), so no continuous load on + the host even on a 5 s refresh schedule. + + Query: sort=cpu|mem, limit=1..100 (default 20). + """ + try: + sort = request.args.get('sort', 'cpu') + if sort not in ('cpu', 'mem'): + sort = 'cpu' + try: + limit = max(1, min(int(request.args.get('limit', 20)), 100)) + except (TypeError, ValueError): + limit = 20 + + sort_field = '-pcpu' if sort == 'cpu' else '-pmem' + result = subprocess.run( + ['ps', '-eo', 'pid,user,pcpu,pmem,rss,comm', + '--sort', sort_field, '--no-headers'], + capture_output=True, text=True, timeout=5 + ) + if result.returncode != 0: + return jsonify({'error': result.stderr or 'ps failed'}), 500 + + processes = [] + for line in result.stdout.splitlines()[:limit]: + # Split into at most 6 fields so the command can contain spaces. + parts = line.strip().split(None, 5) + if len(parts) < 6: + continue + try: + processes.append({ + 'pid': int(parts[0]), + 'user': parts[1], + 'cpu': float(parts[2]), + 'mem': float(parts[3]), + 'rss_kb': int(parts[4]), + 'command': parts[5], + }) + except (ValueError, TypeError): + continue + + return jsonify({ + 'processes': processes, + 'sort': sort, + 'captured_at': int(time.time()), + }) + except subprocess.TimeoutExpired: + return jsonify({'error': 'ps timed out'}), 504 + except Exception as e: + return jsonify({'error': str(e)}), 500 + + +@app.route('/api/processes/', methods=['GET']) +@require_auth +def api_process_detail(pid): + """Detailed info for a single process read from /proc//. + + Called from the Top-process row-click → detail modal. Lazy: only + hit when the user explicitly clicks a row, then refreshed inside + the modal every few seconds while it's open. + """ + import pwd, grp + try: + proc_path = f'/proc/{pid}' + if not os.path.isdir(proc_path): + return jsonify({'error': 'Process not found', 'pid': pid}), 404 + + info = {'pid': pid} + + # Short name + full argv + try: + with open(f'{proc_path}/comm') as f: + info['comm'] = f.read().strip() + except Exception: + info['comm'] = None + try: + with open(f'{proc_path}/cmdline', 'rb') as f: + raw = f.read() + # argv null-byte separated; drop trailing null + info['cmdline'] = raw.replace(b'\0', b' ').strip().decode('utf-8', 'replace') + except Exception: + info['cmdline'] = None + + # Symlinks (may EACCES for kernel threads / other namespaces) + try: + info['exe'] = os.readlink(f'{proc_path}/exe') + except (OSError, PermissionError): + info['exe'] = None + try: + info['cwd'] = os.readlink(f'{proc_path}/cwd') + except (OSError, PermissionError): + info['cwd'] = None + + # /proc//status (Vm* sizes, State, PPid, Threads, Uid, Gid) + status = {} + try: + with open(f'{proc_path}/status') as f: + for line in f: + if ':' in line: + k, v = line.split(':', 1) + status[k.strip()] = v.strip() + except Exception: + pass + + info['state'] = status.get('State', '') + try: + info['ppid'] = int(status.get('PPid', '0')) + except (ValueError, TypeError): + info['ppid'] = 0 + try: + info['threads'] = int(status.get('Threads', '0')) + except (ValueError, TypeError): + info['threads'] = 0 + + def _kb(field): + try: + return int(status.get(field, '0').split()[0]) + except (ValueError, IndexError, AttributeError): + return 0 + info['vm_rss_kb'] = _kb('VmRSS') + info['vm_size_kb'] = _kb('VmSize') + info['vm_swap_kb'] = _kb('VmSwap') + + # Uid → user name; Gid → group name + try: + uid = int(status.get('Uid', '0').split()[0]) + info['uid'] = uid + try: + info['user'] = pwd.getpwuid(uid).pw_name + except KeyError: + info['user'] = str(uid) + except Exception: + info['uid'] = None + info['user'] = None + try: + gid = int(status.get('Gid', '0').split()[0]) + info['gid'] = gid + try: + info['group'] = grp.getgrgid(gid).gr_name + except KeyError: + info['group'] = str(gid) + except Exception: + info['gid'] = None + info['group'] = None + + # Parent process short name (for the "started by" line) + info['parent_name'] = None + if info['ppid']: + try: + with open(f'/proc/{info["ppid"]}/comm') as f: + info['parent_name'] = f.read().strip() + except Exception: + pass + + # Live ps fields the kernel doesn't expose in /proc directly + try: + ps_out = subprocess.run( + ['ps', '-o', 'lstart=,etime=,pcpu=,pmem=', '-p', str(pid)], + capture_output=True, text=True, timeout=2 + ) + if ps_out.returncode == 0: + line = ps_out.stdout.strip() + if line: + # lstart is 5 whitespace-separated tokens (`Wed Jun 4 17:12:23 2026`), + # so we split off the trailing 3 fixed fields from the right. + parts = line.rsplit(None, 3) + if len(parts) == 4: + info['start_time'] = parts[0] + info['elapsed'] = parts[1] + try: + info['cpu'] = float(parts[2]) + except ValueError: + info['cpu'] = 0.0 + try: + info['mem'] = float(parts[3]) + except ValueError: + info['mem'] = 0.0 + except Exception: + pass + + # I/O accounting (kernel requires CONFIG_TASK_IO_ACCOUNTING; also EACCES for non-self) + try: + io = {} + with open(f'{proc_path}/io') as f: + for line in f: + if ':' in line: + k, v = line.split(':', 1) + try: + io[k.strip()] = int(v.strip()) + except ValueError: + pass + info['io_read_bytes'] = io.get('read_bytes') + info['io_write_bytes'] = io.get('write_bytes') + except (OSError, PermissionError): + info['io_read_bytes'] = None + info['io_write_bytes'] = None + + # Open FDs + try: + info['fd_count'] = len(os.listdir(f'{proc_path}/fd')) + except (OSError, PermissionError): + info['fd_count'] = None + + info['captured_at'] = int(time.time()) + return jsonify(info) + except Exception as e: + return jsonify({'error': str(e)}), 500 + + @app.route('/api/temperature/history', methods=['GET']) @require_auth def api_temperature_history(): diff --git a/scripts/gpu_tpu/install_coral_lxc.sh b/scripts/gpu_tpu/install_coral_lxc.sh index 9920b751..e72efbe3 100644 --- a/scripts/gpu_tpu/install_coral_lxc.sh +++ b/scripts/gpu_tpu/install_coral_lxc.sh @@ -402,6 +402,30 @@ configure_lxc_hardware() { # INSTALL CORAL TPU DRIVER INSIDE CONTAINER # ========================================================== +# Detect the package family inside the container. The PVE passthrough +# config (above) works on any distro, but Google only ships an official +# libedgetpu APT repo for Debian/Ubuntu. For other distros we configure +# the device and skip the runtime install with a clear message. +detect_container_distro() { + local id_like id + id=$(pct exec "$CONTAINER_ID" -- sh -c 'awk -F= "/^ID=/{gsub(/\"/, \"\", \$2); print \$2}" /etc/os-release 2>/dev/null' 2>/dev/null | tr -d '\r' | tr '[:upper:]' '[:lower:]') + id_like=$(pct exec "$CONTAINER_ID" -- sh -c 'awk -F= "/^ID_LIKE=/{gsub(/\"/, \"\", \$2); print \$2}" /etc/os-release 2>/dev/null' 2>/dev/null | tr -d '\r' | tr '[:upper:]' '[:lower:]') + case "$id" in + debian|ubuntu|raspbian|linuxmint|pop|kali|devuan) echo "debian"; return ;; + alpine) echo "alpine"; return ;; + arch|manjaro|endeavouros|garuda|cachyos) echo "arch"; return ;; + rhel|centos|rocky|almalinux|fedora|amzn|ol) echo "rhel"; return ;; + opensuse*|sles|suse) echo "suse"; return ;; + esac + case "$id_like" in + *debian*|*ubuntu*) echo "debian"; return ;; + *arch*) echo "arch"; return ;; + *rhel*|*fedora*|*centos*) echo "rhel"; return ;; + *suse*) echo "suse"; return ;; + esac + echo "unknown" +} + install_coral_in_container() { msg_info "$(translate 'Installing Coral TPU driver inside the container...')" tput sc @@ -420,12 +444,48 @@ install_coral_in_container() { stop_spinner - # Pre-flight: refuse to run on non-Debian-family containers. The - # apt-get block below would crash with cryptic errors and leave the - # container half-configured. - if ! pct exec "$CONTAINER_ID" -- bash -c 'command -v apt-get' &>/dev/null; then - msg_error "$(translate 'Container does not have apt-get available. Coral driver installation only supports Debian/Ubuntu containers.')" - return 1 + # Detect the container distro. Passthrough config (already written to + # /etc/pve/lxc/.conf above) works on any distro — only the libedgetpu + # runtime install path is distro-specific, and Google's official APT repo + # only covers Debian/Ubuntu. For other distros offer an opt-in + # passthrough-only mode (skip the apt-get install, leave the device + # visible inside the CT so app-level runtimes can use it, e.g. the + # Frigate Docker image bundles libedgetpu). If the user declines, + # behave exactly like the pre-detection version: error out and abort. + local CT_FAMILY + CT_FAMILY=$(detect_container_distro) + + if [[ "$CT_FAMILY" != "debian" ]]; then + rm -f "$LOG_FILE" + local distro_label + case "$CT_FAMILY" in + alpine) distro_label="Alpine" ;; + arch) distro_label="Arch / Manjaro" ;; + rhel) distro_label="RHEL / Rocky / AlmaLinux / Fedora" ;; + suse) distro_label="openSUSE / SLES" ;; + *) distro_label="$(translate 'this distribution')" ;; + esac + + # whiptail (not dialog) — prompt sits in the middle of the install + # flow. Default is "No" so a user who just presses Enter / Esc lands + # on the same abort path as the legacy behaviour. + if ! whiptail --title "$(translate 'Non-Debian container detected')" --defaultno \ + --yesno "$(translate 'Detected:') $distro_label + +$(translate 'Google only ships an official libedgetpu APT repository for Debian/Ubuntu. Hardware passthrough is already written to') /etc/pve/lxc/${CONTAINER_ID}.conf $(translate '— that part works on any distro and is harmless.') + +$(translate 'Would you like to continue in passthrough-only mode? The libedgetpu APT install will be skipped, the Coral device will still be visible inside the container (e.g. /dev/apex_0), and you can install the runtime yourself or use an app container that bundles it (e.g. the Frigate Docker image).') + +$(translate 'Choose No to abort and roll back to the legacy refuse behaviour.')" 22 78 \ + 3>&1 1>&2 2>&3; then + msg_error "$(translate 'Container does not have apt-get available. Coral driver installation only supports Debian/Ubuntu containers.')" + return 1 + fi + + msg_warn "$(translate 'Container distro') ($distro_label) $(translate 'is not supported by the official Google libedgetpu APT repository.')" + msg_ok "$(translate 'Hardware passthrough is already configured — the Coral device is visible inside the container as /dev/apex_0 (M.2) and/or /dev/bus/usb (USB).')" + msg_info2 "$(translate 'To use Coral from a regular app, install the libedgetpu runtime via the usual method for your distro (community package or build from source). The simplest path is to run an app container that bundles the runtime — e.g. the Frigate Docker image — passing the device through with') --device /dev/apex_0:/dev/apex_0" + return 0 fi # Determine driver package for Coral M.2 (USB always uses -std). diff --git a/web/app/[locale]/docs/monitor/dashboard/system-overview/page.tsx b/web/app/[locale]/docs/monitor/dashboard/system-overview/page.tsx index 55d3fea1..e4d9819e 100644 --- a/web/app/[locale]/docs/monitor/dashboard/system-overview/page.tsx +++ b/web/app/[locale]/docs/monitor/dashboard/system-overview/page.tsx @@ -31,6 +31,7 @@ export default async function SystemOverviewTabPage({ const messages = (await getMessages({ locale })) as unknown as { docs: { monitor: { dashboard: { systemOverview: { topRow: { rows: TopRow[]; thresholdsItems: string[] } + processes: { listItems: string[]; detailItems: string[] } bottom: { storageItems: string[] } refresh: { items: string[] } dataCollected: { rows: DataRow[] } @@ -40,6 +41,8 @@ export default async function SystemOverviewTabPage({ const so = messages.docs.monitor.dashboard.systemOverview const topRows = so.topRow.rows const thresholdsItems = so.topRow.thresholdsItems + const processListItems = so.processes.listItems + const processDetailItems = so.processes.detailItems const storageItems = so.bottom.storageItems const refreshItems = so.refresh.items const dataRows = so.dataCollected.rows @@ -135,6 +138,58 @@ export default async function SystemOverviewTabPage({ {t("topRow.sparklineBody")} +

{t("processes.heading")}

+

+ {t.rich("processes.intro", { code })} +

+ +

{t("processes.listTitle")}

+
    + {processListItems.map((_, idx) => ( +
  • {t.rich(`processes.listItems.${idx}`, { strong })}
  • + ))} +
+ +
+ {t("processes.captureListAlt")} +
+ {t("processes.captureListCaption")} +
+
+ +

{t("processes.detailTitle")}

+

+ {t.rich("processes.detailIntro", { code })} +

+
    + {processDetailItems.map((_, idx) => ( +
  • {t.rich(`processes.detailItems.${idx}`, { strong, code })}
  • + ))} +
+

+ {t.rich("processes.detailRefresh", { em, code })} +

+ +
+ {t("processes.captureDetailAlt")} +
+ {t("processes.captureDetailCaption")} +
+
+ +

{t("processes.sourceTitle")}

+

+ {t.rich("processes.sourceBody", { code, em })} +

+

{t("middle.heading")}

{t.rich("middle.body1", { code, em })} diff --git a/web/messages/en/docs/hardware/coral-tpu-lxc.json b/web/messages/en/docs/hardware/coral-tpu-lxc.json index f834752c..cb97aeb7 100644 --- a/web/messages/en/docs/hardware/coral-tpu-lxc.json +++ b/web/messages/en/docs/hardware/coral-tpu-lxc.json @@ -20,7 +20,7 @@ "title": "Before you start", "drivers": "Coral drivers already installed on the host. This script does not install them; it only configures passthrough to the container. Run Install Coral TPU on the Host first if you haven't.", "driversCheck": "ls /dev/apex_* 2>/dev/null ; lsusb | grep -E '1a6e:089a|18d1:9302'", - "container": "An existing LXC container, ideally running a Debian / Ubuntu-based distro. The inside-container install uses apt-get; Alpine / Arch containers are not currently supported by this script.", + "container": "An existing LXC container, ideally running a Debian / Ubuntu-based distro — the in-container runtime install uses apt-get. Non-Debian containers (Alpine, Arch, RHEL, SUSE…) are still supported in passthrough-only mode: the script detects the distro and offers a prompt to skip the libedgetpu APT install while still writing the device passthrough config — useful for app containers that bundle the runtime themselves (e.g. the Frigate Docker image).", "downtime": "Be OK with a brief downtime of the container. The script stops it to apply config changes, then starts it back up to install drivers inside. No host reboot needed." }, "hostPrep": { @@ -70,8 +70,8 @@ ], "noIgpuTitle": "Why no iGPU drivers here?", "noIgpuBody": "Earlier versions of this script also installed Intel va-driver-all, intel-opencl-icd and friends so the same container could do Quick Sync video decode alongside Coral inference. That doubled-up responsibility caused confusing failures when the user only wanted Coral. The iGPU side is now the exclusive job of Add GPU to LXC — run it first if you also want hardware video decode in the container.", - "debianTitle": "Debian / Ubuntu containers only", - "debianBody": "The in-container install uses apt-get directly. Alpine, Arch or RHEL-based containers are not currently supported — the install step will fail and leave the LXC with the passthrough config but no drivers inside. For those distros, install the Coral runtime manually following Google's official guide after the LXC config step." + "debianTitle": "Non-Debian containers — passthrough-only mode", + "debianBody": "The in-container runtime install uses apt-get, which only ships on Debian/Ubuntu-family distros. On Alpine, Arch, RHEL or SUSE containers the script detects the distro via /etc/os-release and shows a confirmation prompt: continue in passthrough-only mode (writes the device config to /etc/pve/lxc/<ctid>.conf and skips the libedgetpu APT install) or abort. Passthrough-only is the right choice if the app container that will actually use Coral already bundles the runtime — the canonical example is the Frigate Docker image. Otherwise, follow Google's official guide to install libedgetpu manually after the script has written the LXC config." }, "summary": { "title": "Summary", @@ -95,8 +95,8 @@ "apexBody": "Host apex module isn't loaded. On the host: lsmod | grep apex — if empty, run modprobe apex, or reboot if you just installed Coral drivers. Once the host has /dev/apex_0, restart the container: pct stop <ctid> && pct start <ctid>.", "replugTitle": "USB Coral disappears after replug in a different port", "replugBody": "This is exactly why the script mounts /dev/bus/usb instead of the /dev/coral symlink. If you're hitting this, check your LXC config has lxc.mount.entry: /dev/bus/usb dev/bus/usb ... and not a reference to /dev/coral directly. Old configs from earlier script versions may need updating — re-run the script on the same container and the config gets refreshed.", - "alpineTitle": "In-container install fails on an Alpine container", - "alpineBody": "The script uses apt-get, which Alpine doesn't have. The LXC passthrough config is still valid — just install the Coral runtime manually with apk add following Google's guide for Alpine, or use a Debian-based container if you don't need the smaller footprint.", + "alpineTitle": "Alpine / Arch / RHEL / SUSE container — runtime not installed", + "alpineBody": "If you chose passthrough-only mode when the script prompted, the LXC config is written and the Coral device is visible inside the container, but the libedgetpu runtime is not installed. That's by design: Google's APT repo only ships for Debian/Ubuntu. Install the runtime manually with your distro's package manager (Alpine: apk add; Arch: AUR; RHEL/SUSE: build from source) following Google's official guide, or run an app container that bundles the runtime — the Frigate Docker image is the canonical example: just expose the device with --device /dev/apex_0:/dev/apex_0 (M.2) or the USB bind mount the script already wrote (USB).", "frigateTitle": "Frigate says 'Coral EdgeTPU detected but not available'", "frigateBody": "Almost always a permissions issue inside the container. Frigate runs as root by default; check the root user is in the plugdev group inside the container (for USB), and that the process can read /dev/apex_0 (for M.2). ls -l /dev/apex_0 from inside the container should show group apex — if not, add the GID alignment to /etc/group or switch the container to privileged mode.", "logsTitle": "Check both host and container logs", diff --git a/web/messages/en/docs/monitor/dashboard/system-overview.json b/web/messages/en/docs/monitor/dashboard/system-overview.json index dea9a13e..f179fffc 100644 --- a/web/messages/en/docs/monitor/dashboard/system-overview.json +++ b/web/messages/en/docs/monitor/dashboard/system-overview.json @@ -53,6 +53,32 @@ "sparklineTitle": "The sparkline is meaningful", "sparklineBody": "The temperature card draws a 5-minute trace under the value, with the line and gradient colour following the same Warning/Critical pair documented above. It's the fastest way to see whether the host is in a thermal climb without opening the detail modal." }, + "processes": { + "heading": "Click-through: top processes by CPU / Memory", + "intro": "The CPU Usage and Memory cards are clickable. Clicking either opens a sortable list of the top 25 processes — the CPU card sorts by %CPU, the Memory card sorts by resident memory (RSS). Both pull from /api/processes?sort=cpu|mem&limit=25, which runs a single ps -eo pid,user,pcpu,pmem,rss,comm per refresh.", + "listTitle": "The list modal", + "listItems": [ + "Auto-refresh — the list re-fetches every 5 s while the dialog is open. Closing the dialog stops all polling.", + "Filter box — matches against command, user or PID without re-fetching from the server.", + "Inline progress bar — the primary metric column draws a bar scaled to the largest value in the filtered list, so visual ranking is preserved even when no process is near 100 %.", + "Mobile layout — under 640 px the PID and User columns drop out so Command, CPU % and Memory still fit without horizontal scroll." + ], + "captureListAlt": "Top processes by Memory modal — table with PID, USER, COMMAND, CPU %, Memory columns sorted by RSS", + "captureListCaption": "The Memory card opens the list sorted by RSS (indigo accent). The CPU card opens the same list sorted by %CPU (blue accent).", + "detailTitle": "Per-process detail", + "detailIntro": "Clicking any row in the list opens a second modal with the full live picture of that one process, served from /api/processes/<pid>. Four sections:", + "detailItems": [ + "Overview — state (R/S/D/Z/…), parent (PPid + parent comm), thread count, open FD count, user and group.", + "Resources — CPU %, Memory %, Resident (RSS), Virtual size, Swap, I/O read and write bytes.", + "Command — short name (comm), full command line, executable path and working directory.", + "Lifetime — start timestamp and elapsed runtime." + ], + "detailRefresh": "The detail modal refreshes every 3 s while open. If the process exits mid-modal the next refresh surfaces Process exited instead of stale data — expected for short-lived helpers like pct exec or backup subprocesses.", + "captureDetailAlt": "Process detail modal — Overview, Resources, Command and Lifetime sections for a single PID", + "captureDetailCaption": "Per-process detail modal opened from a list row. The accent colour matches the card that opened it (blue for CPU, indigo for Memory).", + "sourceTitle": "Where the data comes from", + "sourceBody": "/api/processes/<pid> reads /proc/<pid>/cmdline, /exe, /cwd, /status, /io and /fd directly, and calls ps -o lstart=,etime=,pcpu=,pmem= -p <pid> for the live fields the kernel doesn't expose in /proc. UID and GID are resolved to user / group names through Python's pwd / grp modules. Both endpoints are pure on-demand HTTP handlers — no daemon, no background sampling. Nothing runs on the server when the modal is closed." + }, "middle": { "heading": "Middle: node metrics charts", "body1": "Below the top row sits the NodeMetricsCharts component — historical CPU, memory and disk-I/O graphs sourced from Proxmox's own RRD store via /api/node/metrics. A timeframe selector switches between 1 hour / 24 hours / 7 days / 30 days / 1 year; data resolution drops as the window grows so the chart stays smooth.", diff --git a/web/messages/es/docs/hardware/coral-tpu-lxc.json b/web/messages/es/docs/hardware/coral-tpu-lxc.json index 4cecdab0..6b169463 100644 --- a/web/messages/es/docs/hardware/coral-tpu-lxc.json +++ b/web/messages/es/docs/hardware/coral-tpu-lxc.json @@ -20,7 +20,7 @@ "title": "Antes de empezar", "drivers": "Drivers de Coral ya instalados en el host. Este script no los instala; solo configura el passthrough al contenedor. Ejecuta Install Coral TPU on the Host primero si no lo has hecho.", "driversCheck": "ls /dev/apex_* 2>/dev/null ; lsusb | grep -E '1a6e:089a|18d1:9302'", - "container": "Un contenedor LXC existente, idealmente con una distro basada en Debian / Ubuntu. La instalación dentro del contenedor usa apt-get; los contenedores Alpine / Arch no están soportados por este script actualmente.", + "container": "Un contenedor LXC existente, idealmente con una distro basada en Debian / Ubuntu — la instalación del runtime dentro del contenedor usa apt-get. Los contenedores no-Debian (Alpine, Arch, RHEL, SUSE…) siguen estando soportados en modo passthrough-only: el script detecta la distro y ofrece un prompt para saltarse la instalación APT de libedgetpu mientras escribe la config de passthrough del dispositivo — útil para contenedores de aplicación que ya incluyen el runtime (p.ej. la imagen Docker de Frigate).", "downtime": "Asume una breve interrupción del contenedor. El script lo para para aplicar los cambios de config y lo arranca de nuevo para instalar los drivers dentro. No hace falta reiniciar el host." }, "hostPrep": { @@ -70,8 +70,8 @@ ], "noIgpuTitle": "¿Por qué no hay drivers de iGPU aquí?", "noIgpuBody": "Versiones anteriores de este script también instalaban Intel va-driver-all, intel-opencl-icd y compañía para que el mismo contenedor pudiera hacer decode de vídeo Quick Sync junto a la inferencia de Coral. Esa doble responsabilidad causaba fallos confusos cuando el usuario solo quería Coral. El lado iGPU es ahora trabajo exclusivo de Añadir GPU a LXC — ejecútalo primero si también quieres decode de vídeo por hardware en el contenedor.", - "debianTitle": "Solo contenedores Debian / Ubuntu", - "debianBody": "La instalación dentro del contenedor usa apt-get directamente. Los contenedores Alpine, Arch o basados en RHEL no están soportados actualmente — el paso de instalación fallará y dejará el LXC con la config de passthrough pero sin drivers dentro. Para esas distros, instala el runtime de Coral manualmente siguiendo la guía oficial de Google después del paso de config del LXC." + "debianTitle": "Contenedores no-Debian — modo passthrough-only", + "debianBody": "La instalación del runtime dentro del contenedor usa apt-get, que solo viene con distros de la familia Debian/Ubuntu. En contenedores Alpine, Arch, RHEL o SUSE el script detecta la distro vía /etc/os-release y muestra un prompt de confirmación: continuar en modo passthrough-only (escribe la config del dispositivo en /etc/pve/lxc/<ctid>.conf y se salta la instalación APT de libedgetpu) o cancelar. Passthrough-only es la opción correcta si el contenedor de aplicación que va a usar Coral ya incluye el runtime — el ejemplo canónico es la imagen Docker de Frigate. Si no, sigue la guía oficial de Google para instalar libedgetpu manualmente después de que el script haya escrito la config del LXC." }, "summary": { "title": "Resumen", @@ -95,8 +95,8 @@ "apexBody": "El módulo apex del host no está cargado. En el host: lsmod | grep apex — si está vacío, ejecuta modprobe apex, o reinicia si acabas de instalar los drivers de Coral. Una vez el host tenga /dev/apex_0, reinicia el contenedor: pct stop <ctid> && pct start <ctid>.", "replugTitle": "La Coral USB desaparece al reconectarla en otro puerto", "replugBody": "Justo por eso el script monta /dev/bus/usb en lugar del symlink /dev/coral. Si te pasa esto, comprueba que tu config del LXC tiene lxc.mount.entry: /dev/bus/usb dev/bus/usb ... y no una referencia directa a /dev/coral. Las configs viejas de versiones anteriores del script pueden necesitar actualizarse — vuelve a ejecutar el script sobre el mismo contenedor y la config se refresca.", - "alpineTitle": "La instalación dentro del contenedor falla en un contenedor Alpine", - "alpineBody": "El script usa apt-get, que Alpine no tiene. La config de passthrough del LXC sigue siendo válida — solo instala el runtime de Coral manualmente con apk add siguiendo la guía de Google para Alpine, o usa un contenedor basado en Debian si no necesitas la huella más pequeña.", + "alpineTitle": "Contenedor Alpine / Arch / RHEL / SUSE — runtime no instalado", + "alpineBody": "Si elegiste modo passthrough-only cuando el script lo preguntó, la config del LXC se escribió y el dispositivo Coral es visible dentro del contenedor, pero el runtime libedgetpu no está instalado. Es así por diseño: el repo APT de Google solo se publica para Debian/Ubuntu. Instala el runtime manualmente con el gestor de paquetes de tu distro (Alpine: apk add; Arch: AUR; RHEL/SUSE: compilar desde fuente) siguiendo la guía oficial de Google, o usa un contenedor de aplicación que incluya el runtime — la imagen Docker de Frigate es el ejemplo canónico: solo expone el dispositivo con --device /dev/apex_0:/dev/apex_0 (M.2) o el bind mount USB que el script ya escribió (USB).", "frigateTitle": "Frigate dice 'Coral EdgeTPU detected but not available'", "frigateBody": "Casi siempre es un problema de permisos dentro del contenedor. Frigate corre como root por defecto; comprueba que el usuario root está en el grupo plugdev dentro del contenedor (para USB), y que el proceso puede leer /dev/apex_0 (para M.2). ls -l /dev/apex_0 desde dentro del contenedor debería mostrar el grupo apex — si no, añade el alineamiento de GID a /etc/group o cambia el contenedor a modo privilegiado.", "logsTitle": "Revisa los logs del host y del contenedor", diff --git a/web/messages/es/docs/monitor/dashboard/system-overview.json b/web/messages/es/docs/monitor/dashboard/system-overview.json index 2f686c13..f69eadde 100644 --- a/web/messages/es/docs/monitor/dashboard/system-overview.json +++ b/web/messages/es/docs/monitor/dashboard/system-overview.json @@ -53,6 +53,32 @@ "sparklineTitle": "El sparkline es significativo", "sparklineBody": "La tarjeta de temperatura dibuja una traza de 5 minutos bajo el valor, con la línea y el degradado siguiendo el mismo par Warning/Critical documentado arriba. Es la forma más rápida de ver si el host está en escalada térmica sin abrir la modal de detalle." }, + "processes": { + "heading": "Acceso directo: top procesos por CPU / Memoria", + "intro": "Las tarjetas CPU Usage y Memory son clicables. Al pulsar cualquiera de ellas se abre una lista ordenable con los 25 procesos top — la tarjeta de CPU ordena por %CPU, la de Memory ordena por memoria residente (RSS). Ambas tiran de /api/processes?sort=cpu|mem&limit=25, que ejecuta un único ps -eo pid,user,pcpu,pmem,rss,comm por refresco.", + "listTitle": "La modal con la lista", + "listItems": [ + "Auto-refresco — la lista vuelve a obtener datos cada 5 s mientras el diálogo está abierto. Al cerrar el diálogo se detiene todo el polling.", + "Caja de filtro — busca por command, user o PID sin volver a pedir datos al servidor.", + "Barra de progreso en línea — la columna de la métrica primaria dibuja una barra escalada al mayor valor de la lista filtrada, para que el orden visual se mantenga aunque ningún proceso esté cerca del 100 %.", + "Layout móvil — por debajo de 640 px las columnas PID y User desaparecen para que Command, CPU % y Memory sigan cabiendo sin scroll horizontal." + ], + "captureListAlt": "Modal Top processes by Memory — tabla con columnas PID, USER, COMMAND, CPU %, Memory ordenada por RSS", + "captureListCaption": "La tarjeta Memory abre la lista ordenada por RSS (acento índigo). La tarjeta CPU abre la misma lista ordenada por %CPU (acento azul).", + "detailTitle": "Detalle por proceso", + "detailIntro": "Al pulsar cualquier fila de la lista se abre una segunda modal con la foto en vivo completa de ese proceso, servida desde /api/processes/<pid>. Cuatro secciones:", + "detailItems": [ + "Overview — estado (R/S/D/Z/…), proceso padre (PPid + comm del padre), número de hilos, FDs abiertos, usuario y grupo.", + "Resources — CPU %, Memoria %, Resident (RSS), Virtual size, Swap, bytes de I/O de lectura y escritura.", + "Command — nombre corto (comm), línea de comandos completa, ruta del ejecutable y directorio de trabajo.", + "Lifetime — timestamp de arranque y tiempo transcurrido en ejecución." + ], + "detailRefresh": "La modal de detalle se refresca cada 3 s mientras está abierta. Si el proceso termina con la modal abierta, el siguiente refresco muestra Process exited en vez de datos obsoletos — esperable para procesos efímeros como pct exec o subprocesos de backup.", + "captureDetailAlt": "Modal de detalle de proceso — secciones Overview, Resources, Command y Lifetime para un único PID", + "captureDetailCaption": "Modal de detalle por proceso abierta desde una fila de la lista. El color de acento sigue al de la tarjeta que la abrió (azul para CPU, índigo para Memory).", + "sourceTitle": "De dónde salen los datos", + "sourceBody": "/api/processes/<pid> lee directamente /proc/<pid>/cmdline, /exe, /cwd, /status, /io y /fd, y llama a ps -o lstart=,etime=,pcpu=,pmem= -p <pid> para los campos en vivo que el kernel no expone en /proc. UID y GID se resuelven a nombre de usuario / grupo con los módulos pwd / grp de Python. Ambos endpoints son handlers HTTP puros bajo demanda — sin daemon, sin sampling en background. No corre nada en el servidor mientras la modal esté cerrada." + }, "middle": { "heading": "Medio: gráficas de métricas del nodo", "body1": "Bajo la fila superior se encuentra el componente NodeMetricsCharts — gráficas históricas de CPU, memoria y E/S de disco tomadas del propio almacén RRD de Proxmox vía /api/node/metrics. Un selector de timeframe alterna entre 1 hora / 24 horas / 7 días / 30 días / 1 año; la resolución de los datos baja a medida que crece la ventana para que la gráfica se mantenga fluida.", diff --git a/web/public/monitor/dashboard-home.png b/web/public/monitor/dashboard-home.png index fbfe6856..d468d4a8 100644 Binary files a/web/public/monitor/dashboard-home.png and b/web/public/monitor/dashboard-home.png differ diff --git a/web/public/monitor/system-overview-process-detail.png b/web/public/monitor/system-overview-process-detail.png new file mode 100644 index 00000000..7c792413 Binary files /dev/null and b/web/public/monitor/system-overview-process-detail.png differ diff --git a/web/public/monitor/system-overview-top-processes.png b/web/public/monitor/system-overview-top-processes.png new file mode 100644 index 00000000..5f43e9ac Binary files /dev/null and b/web/public/monitor/system-overview-top-processes.png differ