diff --git a/AppImage/ProxMenux-1.2.2.1-beta.AppImage b/AppImage/ProxMenux-1.2.2.1-beta.AppImage index cf21e799..cb1930ea 100755 Binary files a/AppImage/ProxMenux-1.2.2.1-beta.AppImage and b/AppImage/ProxMenux-1.2.2.1-beta.AppImage differ diff --git a/AppImage/ProxMenux-Monitor.AppImage.sha256 b/AppImage/ProxMenux-Monitor.AppImage.sha256 index 7a1e2e2c..b332e90d 100644 --- a/AppImage/ProxMenux-Monitor.AppImage.sha256 +++ b/AppImage/ProxMenux-Monitor.AppImage.sha256 @@ -1 +1 @@ -ee588e46f8898925d60d56a79f5364083be4eedccc2274fd0caeb220f795ade6 ProxMenux-1.2.2.1-beta.AppImage +aa53e689c13d7184ebd7cb46cc0f24af9628804fcaa223a833364a5a09e382ed ProxMenux-1.2.2.1-beta.AppImage diff --git a/AppImage/components/host-backup.tsx b/AppImage/components/host-backup.tsx new file mode 100644 index 00000000..14fea693 --- /dev/null +++ b/AppImage/components/host-backup.tsx @@ -0,0 +1,600 @@ +"use client" + +import { useState } from "react" +import useSWR from "swr" +import { Card, CardContent, CardHeader, CardTitle } from "./ui/card" +import { Button } from "./ui/button" +import { Badge } from "./ui/badge" +import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogDescription } from "./ui/dialog" +import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "./ui/select" +import { + DatabaseBackup, + Clock, + HardDrive, + Server, + CheckCircle2, + AlertTriangle, + XCircle, + Loader2, + PlayCircle, + Archive, + FileSearch, + Calendar, +} from "lucide-react" +import { fetchApi } from "../lib/api-config" +import { formatStorage } from "../lib/utils" + +// ── Shape contracts with the backend (flask_server.py: api_host_backups_*) ── + +interface BackupJob { + id: string + destination: string + method: string // "local_tar" | "pbs" | "borg" | "unknown" + on_calendar: string + retention: string + timer_enabled: boolean + last_status: string | null + next_run: string | null +} + +interface BackupArchive { + id: string // basename of the .tar file (also the URL slug) + path: string // absolute path on host + size_bytes: number + mtime: number // unix seconds + // From the backend identifier — see _identify_host_backup() in flask_server.py. + // kind: "manual" / "scheduled" when we know; "legacy" when only the in-tar + // marker confirmed it's a ProxMenux backup (no sidecar, no name match). + job_id: string | null + kind: "manual" | "scheduled" | "legacy" + profile: string | null + source_hostname: string | null + // Which detection path identified this archive. Surfaced as a small tooltip + // hint so the operator knows whether the metadata is authoritative + // (sidecar) or inferred (filename / tar-peek). + detected_via: "sidecar" | "job_id_match" | "hostcfg_prefix" | "tar_peek" +} + +interface ManifestSourceHost { + hostname: string + pve_version: string | null + roles: string[] + kernel: string + boot_mode: string + cpu_model: string + memory_kb: number +} + +interface PreflightCheck { + id: string + severity: "pass" | "warn" | "fail" + message: string + details: Record | null +} + +interface PreflightReport { + source_host_at_backup: ManifestSourceHost + selected_mode: { + mode: string + paths_include: string[] + paths_exclude: string[] + components_include: string[] + storage_apply: boolean + network_apply: boolean + } + preflight: { + checks: PreflightCheck[] + summary: { pass: number; warn: number; fail: number } + } + storage: { + zfs: Array<{ name: string; action: string; present: string[]; missing: string[] }> + lvm: Array<{ name: string; action: string }> + pve_storage: Array<{ id: string; type: string; action: string; note: string | null }> + in_selected_mode: boolean + } + network: { + keep: Array<{ ifname: string; mac: string }> + remap: Array<{ source_ifname: string; destination_ifname: string; mac: string }> + orphan: Array<{ source_ifname: string; source_mac: string }> + new: Array<{ ifname: string; mac: string }> + in_selected_mode: boolean + } + driver_reinstall: { + plan: Array<{ + component_id: string + type: string + version: string + installer: string | null + action: string + reason: string + }> + } + abort_reason: string | null +} + +const fetcher = async (url: string) => fetchApi(url) + +const formatMtime = (mtime: number) => + new Date(mtime * 1000).toLocaleString(undefined, { + year: "numeric", + month: "short", + day: "numeric", + hour: "2-digit", + minute: "2-digit", + }) + +const formatNext = (iso: string | null) => { + if (!iso) return "—" + try { + return new Date(iso).toLocaleString() + } catch { + return iso + } +} + +export function HostBackup() { + const { data: jobsResp, error: jobsErr } = useSWR<{ jobs: BackupJob[] }>( + "/api/host-backups/jobs", + fetcher, + { refreshInterval: 30000 }, + ) + const { data: archivesResp, error: archivesErr } = useSWR<{ archives: BackupArchive[] }>( + "/api/host-backups/archives", + fetcher, + { refreshInterval: 30000 }, + ) + + const [inspectingArchive, setInspectingArchive] = useState(null) + + return ( +
+ {/* ── Scheduled jobs ───────────────────────────────── */} + + +
+ + Scheduled Backup Jobs +
+ {jobsResp?.jobs?.length ?? 0} +
+ + {jobsErr ? ( +
Failed to load jobs
+ ) : !jobsResp ? ( +
+ + Loading... +
+ ) : jobsResp.jobs.length === 0 ? ( +
+

No scheduled backup jobs configured yet.

+

+ For a one-shot manual backup{" "} + or to create a scheduled job, run: +

+ + bash /usr/local/share/proxmenux/scripts/backup_restore/backup_host.sh + +

+ Menu options 1-6 are manual backups (default or custom paths, to PBS, Borg, or local tar). Option 7 opens the scheduler if you want a recurring job. +

+
+ ) : ( +
+ + + + + + + + + + + + + {jobsResp.jobs.map((j) => ( + + + + + + + + + ))} + +
IDDestinationMethodScheduleLast statusNext run
{j.id} + {j.destination || "—"} + {j.method}{j.on_calendar} + {j.last_status ? ( + {j.last_status} + ) : ( + never + )} + + {formatNext(j.next_run)} + {!j.timer_enabled && ( + + timer disabled + + )} +
+
+ )} +
+
+ + {/* ── Available archives ─────────────────────────────── */} + + +
+ + Available Archives +
+ {archivesResp?.archives?.length ?? 0} +
+ + {archivesErr ? ( +
Failed to load archives
+ ) : !archivesResp ? ( +
+ + Loading... +
+ ) : archivesResp.archives.length === 0 ? ( +
+ No backup archives found on this host. We scan /var/lib/vz/dump and any custom destination from a scheduled job, looking for files named hostcfg-<hostname>-*.tar.zst (manual backups) or <job_id>-*.tar.* (scheduled). PBS and Borg backups aren't surfaced in the UI yet. +
+ ) : ( +
+ {archivesResp.archives.map((a) => ( +
+
+
+ {a.id} +
+
+ + + {formatMtime(a.mtime)} + + + + {formatStorage(a.size_bytes)} + + {a.kind === "scheduled" && a.job_id ? ( + job: {a.job_id} + ) : a.kind === "legacy" ? ( + + legacy + + ) : ( + + manual + + )} + {a.source_hostname && a.source_hostname !== "" && ( + host: {a.source_hostname} + )} +
+
+ +
+ ))} +
+ )} +
+
+ + {/* ── Inspect / preflight modal ──────────────────────── */} + setInspectingArchive(null)} + /> +
+ ) +} + +// ────────────────────────────────────────────────────────────── +// Inspect modal — shows manifest summary + lets the operator pick +// a restore mode and run the dry-run preflight + plan against this +// host. No mutating actions; --apply stays on the CLI for 1.3.0. +// ────────────────────────────────────────────────────────────── +function InspectModal({ + archive, + onClose, +}: { + archive: BackupArchive | null + onClose: () => void +}) { + const open = archive !== null + const [mode, setMode] = useState("full") + const [report, setReport] = useState(null) + const [running, setRunning] = useState(false) + const [error, setError] = useState(null) + + const { data: manifest, error: manifestErr } = useSWR<{ + source_host: ManifestSourceHost + proxmenux_installed_components: Array<{ id: string; version_at_backup: string | null }> + vms_lxcs_at_backup: { vms: unknown[]; lxcs: unknown[] } + storage_inventory?: { zfs_pools?: unknown[]; lvm?: { vgs?: unknown[] } } + }>( + archive ? `/api/host-backups/archives/${encodeURIComponent(archive.id)}/manifest` : null, + fetcher, + ) + + const runPreflight = async () => { + if (!archive) return + setRunning(true) + setError(null) + setReport(null) + try { + const res = await fetchApi( + `/api/host-backups/archives/${encodeURIComponent(archive.id)}/preflight`, + { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ mode }), + }, + ) + setReport(res) + } catch (e: any) { + setError(e?.message || "Preflight failed") + } finally { + setRunning(false) + } + } + + // Reset state when archive changes + const archiveId = archive?.id + // Note: this useEffect-like cleanup happens via key={archiveId} on the + // Dialog content so React unmounts and remounts; state resets naturally. + + return ( + { if (!v) onClose() }}> + + + + + {archive?.id} + + + Inspect the manifest snapshot taken at backup time, then dry-run the restore plan for a chosen mode. Read-only; nothing on this host is changed. + + + + {/* Manifest summary */} + {manifestErr ? ( +
+ Couldn't read the manifest from this archive — it may have been created before the manifest format was added. +
+ ) : !manifest ? ( +
+ + Reading manifest... +
+ ) : ( + + )} + + {/* Preflight controls */} +
+
+
+ + +
+ +
+ + {error && ( +
+ {error} +
+ )} + + {report && } +
+
+
+ ) +} + +// ── Manifest summary panel ─────────────────────────────────── +function ManifestSummary({ + manifest, +}: { + manifest: { + source_host: ManifestSourceHost + proxmenux_installed_components: Array<{ id: string; version_at_backup: string | null }> + vms_lxcs_at_backup: { vms: unknown[]; lxcs: unknown[] } + storage_inventory?: { zfs_pools?: unknown[]; lvm?: { vgs?: unknown[] } } + } +}) { + const sh = manifest.source_host + const zfsCount = manifest.storage_inventory?.zfs_pools?.length ?? 0 + const lvmCount = manifest.storage_inventory?.lvm?.vgs?.length ?? 0 + return ( +
+
+ } label="Source host" value={sh.hostname} /> + + + + + + + + +
+ {manifest.proxmenux_installed_components.length > 0 && ( +
+
ProxMenux components at backup time:
+
+ {manifest.proxmenux_installed_components.map((c) => ( + + {c.id}{c.version_at_backup ? ` @ ${c.version_at_backup}` : ""} + + ))} +
+
+ )} +
+ ) +} + +function Field({ icon, label, value, mono }: { icon?: React.ReactNode; label: string; value: string; mono?: boolean }) { + return ( +
+
+ {icon} + {label} +
+
+ {value} +
+
+ ) +} + +// ── Preflight report view ──────────────────────────────────── +function PreflightReportView({ report }: { report: PreflightReport }) { + const { summary, checks } = report.preflight + const passColor = "text-emerald-500" + const warnColor = "text-amber-500" + const failColor = "text-red-500" + + return ( +
+ {/* Summary line */} +
+ + + {summary.pass} pass + + + + {summary.warn} warn + + + + {summary.fail} fail + + {summary.fail > 0 && ( + + --apply would be refused + + )} +
+ + {/* Per-check list */} +
+ {checks.map((c) => { + const color = + c.severity === "pass" ? passColor : + c.severity === "warn" ? warnColor : + failColor + const Icon = + c.severity === "pass" ? CheckCircle2 : + c.severity === "warn" ? AlertTriangle : + XCircle + return ( +
+ +
+ {c.id} + {c.message} +
+
+ ) + })} +
+ + {/* Storage / network counts */} +
+
+
Storage [in mode: {String(report.storage.in_selected_mode)}]
+
+ {report.storage.zfs.length} ZFS pool(s) · + {" "}{report.storage.lvm.length} LVM VG(s) · + {" "}{report.storage.pve_storage.length} PVE storage(s) +
+
+
+
Network [in mode: {String(report.network.in_selected_mode)}]
+
+ {report.network.keep.length} keep · + {" "}{report.network.remap.length} remap · + {" "}{report.network.orphan.length} orphan · + {" "}{report.network.new.length} new +
+
+
+ + {/* Driver plan */} + {report.driver_reinstall.plan.length > 0 && ( +
+
Driver reinstall plan ({report.driver_reinstall.plan.length})
+
+ {report.driver_reinstall.plan.map((p) => ( +
+ {p.component_id} + {p.action} +
+ ))} +
+
+ )} + + {/* Abort reason (if --apply would have been refused) */} + {report.abort_reason && ( +
+ {report.abort_reason} +
+ )} +
+ ) +} diff --git a/AppImage/components/proxmox-dashboard.tsx b/AppImage/components/proxmox-dashboard.tsx index e69e7140..94186975 100644 --- a/AppImage/components/proxmox-dashboard.tsx +++ b/AppImage/components/proxmox-dashboard.tsx @@ -14,6 +14,7 @@ import { Settings } from "./settings" import { Security } from "./security" import { Profile } from "./profile" import { About } from "./about" +import { HostBackup } from "./host-backup" import { OnboardingCarousel } from "./onboarding-carousel" import { HealthStatusModal } from "./health-status-modal" import { ReleaseNotesModal, useVersionCheck } from "./release-notes-modal" @@ -30,17 +31,26 @@ import { LayoutDashboard, HardDrive, NetworkIcon, - Box, + Boxes, Cpu, - FileText, + ScrollText, SettingsIcon, + Settings2, Terminal, ShieldCheck, Info, + DatabaseBackup, + ChevronDown, } from "lucide-react" import Image from "next/image" import { ThemeToggle } from "./theme-toggle" import { Sheet, SheetContent, SheetTrigger } from "./ui/sheet" +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuTrigger, +} from "./ui/dropdown-menu" interface SystemStatus { status: "healthy" | "warning" | "critical" @@ -352,28 +362,19 @@ export function ProxmoxDashboard() { const getActiveTabLabel = () => { switch (activeTab) { - case "overview": - return "Overview" - case "storage": - return "Storage" - case "network": - return "Network" - case "vms": - return "VMs & LXCs" - case "hardware": - return "Hardware" - case "terminal": - return "Terminal" - case "logs": - return "System Logs" - case "security": - return "Security" - case "settings": - return "Settings" - case "profile": - return "Profile" - default: - return "Navigation Menu" + case "overview": return "Overview" + case "vms": return "VMs & LXCs" + case "storage": return "Storage" + case "network": return "Network" + case "hardware": return "Hardware" + case "backup": return "Backup" + case "terminal": return "Terminal" + case "logs": return "System Logs" + case "security": return "Security" + case "settings": return "Settings" + case "about": return "About" + case "profile": return "Profile" + default: return "Navigation Menu" } } @@ -565,71 +566,128 @@ export function ProxmoxDashboard() { >
- {/* Issue #191: 10 tabs after adding About. The grid wraps via - Tabs primitives so the extra column doesn't push the - triggers off-screen on common laptop widths. */} - - - Overview - - - Storage - - - Network - - - VMs & LXCs - - - Hardware - - - System Logs - - - Terminal - - - Security - - - Settings - - - About - - + {/* Sprint 13D nav redesign — 6 top-level slots in usage order: + Overview · VMs & LXCs · Node ⌄ · Backup · Terminal · Admin ⌄ + Node groups Storage / Network / Hardware (3 sub-items). + Admin groups System Logs / Security / Settings / About + (will split when RBAC arrives in 1.5.0). + Backup is direct now (only Host Backup); becomes a dropdown + when VM/LXC centralised backup ships. */} + {(() => { + const triggerActiveClass = + "data-[state=active]:bg-blue-500 data-[state=active]:text-white data-[state=active]:rounded-md" + // Each dropdown lists its children in the order they + // render. When one of them is the active tab, the dropdown + // trigger swaps its label + icon to that child — same + // pattern macOS Settings uses inside a category: the + // crumb shows where you are, the chevron tells you the + // siblings are one click away. + const NODE_ITEMS = [ + { value: "storage", label: "Storage", Icon: HardDrive, default: false }, + { value: "network", label: "Network", Icon: NetworkIcon, default: false }, + { value: "hardware", label: "Hardware", Icon: Cpu, default: false }, + ] + const ADMIN_ITEMS = [ + { value: "logs", label: "System Logs", Icon: ScrollText, default: false }, + { value: "security", label: "Security", Icon: ShieldCheck, default: false }, + { value: "settings", label: "Settings", Icon: SettingsIcon, default: false }, + { value: "about", label: "About", Icon: Info, default: false }, + ] + const activeNodeItem = NODE_ITEMS.find(i => i.value === activeTab) + const activeAdminItem = ADMIN_ITEMS.find(i => i.value === activeTab) + const isNodeActive = activeNodeItem !== undefined + const isAdminActive = activeAdminItem !== undefined + // The trigger label + icon shown on the bar. When a child + // is active we surface IT; otherwise the group default. + const NodeTriggerIcon = activeNodeItem ? activeNodeItem.Icon : Server + const NodeTriggerLabel = activeNodeItem ? activeNodeItem.label : "Node" + const AdminTriggerIcon = activeAdminItem ? activeAdminItem.Icon : Settings2 + const AdminTriggerLabel = activeAdminItem ? activeAdminItem.label : "Admin" + // Dropdown trigger styling: parity with TabsTrigger so the + // parent visibly carries the "I'm the selected section" + // signal when any of its children is the active tab — + // same blue background + white text + rounded as a direct + // tab. Without this the user lands on Storage and the + // entire top bar looks idle. + const dropdownBtnClass = (active: boolean) => + `inline-flex items-center justify-center whitespace-nowrap px-3 py-1.5 text-sm font-medium ring-offset-background transition-all focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 ${ + active + ? "bg-blue-500 text-white rounded-md" + : "text-muted-foreground hover:text-foreground rounded-sm" + }` + + return ( + + {/* Direct: Overview */} + + + Overview + + + {/* Direct: VMs & LXCs — first-class because Proxmox IS + a hypervisor; workloads belong at top level. */} + + + VMs & LXCs + + + {/* Dropdown: Node (Storage / Network / Hardware) */} + + + + {NodeTriggerLabel} + + + + {NODE_ITEMS.map(({ value, label, Icon }) => ( + setActiveTab(value)} + className={activeTab === value ? "bg-blue-500/10 text-blue-500" : ""} + > + + {label} + + ))} + + + + {/* Direct: Backup (today: Host Backup only). When VM/LXC + backup ships this becomes a dropdown. */} + + + Backup + + + {/* Direct: Terminal */} + + + Terminal + + + {/* Dropdown: Admin (System Logs / Security / Settings / About) */} + + + + {AdminTriggerLabel} + + + + {ADMIN_ITEMS.map(({ value, label, Icon }) => ( + setActiveTab(value)} + className={activeTab === value ? "bg-blue-500/10 text-blue-500" : ""} + > + + {label} + + ))} + + + + ) + })()}
@@ -646,158 +704,74 @@ export function ProxmoxDashboard() {
-
- - - - - - - - - - -
+ }` + // Mobile sheet is a flat list (no section headers). + // The desktop layout uses dropdowns to express the + // Node/Admin grouping; here we just enumerate items + // in the same visual order. + return ( +
+ + + + + + + + + + + +
+ ) + })()}
@@ -830,6 +804,10 @@ export function ProxmoxDashboard() { + + + + diff --git a/AppImage/scripts/flask_server.py b/AppImage/scripts/flask_server.py index 14fd329e..94320789 100644 --- a/AppImage/scripts/flask_server.py +++ b/AppImage/scripts/flask_server.py @@ -12128,6 +12128,440 @@ def stream_script_logs(session_id): return jsonify({'success': False, 'error': str(e)}), 500 +# ── Host Backup (Sprint 13D, 1.3.0 preview) ────────────────── +# These endpoints surface the host-backup pipeline implemented in +# scripts/backup_restore/ (collectors + restore tooling). They: +# - list configured scheduled jobs (from /var/lib/proxmenux/backup-jobs/) +# - list backup archives present on disk for local_tar destinations +# - extract the manifest from an archive (uses parse_manifest.sh) +# - run the dry-run preflight report (uses run_restore.sh) +# Mutating actions (run-now, create-job, --apply restore) stay on CLI for +# now — UI surface for those lands later in the 1.3.x cycle. + +_PROXMENUX_SCRIPTS_DIR = '/usr/local/share/proxmenux/scripts' +_BACKUP_JOBS_DIR = '/var/lib/proxmenux/backup-jobs' +_BACKUP_LOG_DIR = '/var/log/proxmenux/backup-jobs' +# Always scan PVE's default dump directory in addition to per-job +# DEST_DIRs — manual backups from backup_host.sh (options 1-6) land +# there without ever creating a job env file. +_BACKUP_DEFAULT_DUMP_DIRS = ('/var/lib/vz/dump',) +# Filenames produced by ProxMenux host backups: +# manual (backup_host.sh line 253): hostcfg--YYYYMMDD_HHMMSS.tar.zst +# scheduled (run_scheduled_backup.sh): -YYYYMMDD_HHMMSS. +# This regex matches both; we then cross-check against the known job_ids +# (everything else, like PVE's vzdump-lxc-*, gets dropped). +_BACKUP_FILENAME_RE = re.compile(r'^([A-Za-z0-9._-]+)-(\d{8}_\d{6})\.tar(\.zst|\.gz)?$') + + +def _parse_job_env(file_path: str) -> dict: + """Parse a /var/lib/proxmenux/backup-jobs/*.env file (shell KEY=value + format with optional quoting) into a Python dict. Returns {} on any + I/O or parse error so callers can just .get() with defaults.""" + out: dict = {} + try: + with open(file_path) as f: + for raw in f: + line = raw.strip() + if not line or line.startswith('#') or '=' not in line: + continue + key, val = line.split('=', 1) + key = key.strip() + val = val.strip() + # Strip shell quoting if balanced + if len(val) >= 2 and val[0] == val[-1] and val[0] in ('"', "'"): + val = val[1:-1] + out[key] = val + except OSError: + pass + return out + + +def _collect_backup_scan_dirs(): + """Build the de-duplicated list of directories we scan for host + backup archives: the PVE default(s) plus every local_tar job's + DEST_DIR. Returns directories that actually exist on disk.""" + import glob + dirs = [] + seen = set() + def _add(d): + if d and d not in seen and os.path.isdir(d): + seen.add(d) + dirs.append(d) + for d in _BACKUP_DEFAULT_DUMP_DIRS: + _add(d) + try: + env_files = sorted(glob.glob(f'{_BACKUP_JOBS_DIR}/*.env')) + except OSError: + env_files = [] + for env_file in env_files: + job = _parse_job_env(env_file) + if job.get('METHOD') != 'local_tar': + continue + _add(job.get('DEST_DIR') or job.get('DEST')) + return dirs + + +def _known_job_ids(): + """Set of job_ids that have a .env file on disk — used to associate + a scheduled archive (-.tar*) with its job.""" + import glob + try: + env_files = glob.glob(f'{_BACKUP_JOBS_DIR}/*.env') + except OSError: + return set() + return {os.path.basename(p)[:-len('.env')] for p in env_files} + + +# In-process cache for the tar-peek fallback so we don't re-decompress +# every archive on every Monitor refresh. Keyed by absolute archive +# path; the cached tuple is (size, mtime, is_proxmenux_backup_bool). +# Invalidated automatically whenever size or mtime changes. +_BACKUP_PEEK_CACHE: dict = {} + + +def _read_archive_sidecar(archive_path): + """Read and parse the .proxmenux.json sidecar if present. + Returns the parsed dict on success, or None if the sidecar is + missing or unreadable. A corrupted sidecar drops back to the next + detection path (peek) rather than masking the archive entirely.""" + sidecar = archive_path + '.proxmenux.json' + if not os.path.isfile(sidecar): + return None + try: + with open(sidecar) as f: + data = json.load(f) + except (OSError, json.JSONDecodeError): + return None + return data if isinstance(data, dict) else None + + +def _peek_host_backup_marker(archive_path, st): + """Check whether the archive contains 'metadata/run_info.env' — the + in-tar marker that every ProxMenux host backup ships with. Used as + a fallback when no sidecar is present (legacy archives, or archives + copied in from elsewhere). Result is cached by (size, mtime) so a + second call within the same process is free. + + Implementation: stream `tar -atf` (auto-detect compression by + extension; GNU tar 1.30+) line by line and short-circuit as soon as + we hit the marker. The marker lives in the first ~10-20 entries of + every ProxMenux archive, so we cap the scan at 500 entries — well + above the real archive's TOC depth but bounded enough that a + pathological archive can't keep the worker tied up. + """ + cached = _BACKUP_PEEK_CACHE.get(archive_path) + if cached and cached[0] == st.st_size and cached[1] == int(st.st_mtime): + return cached[2] + + is_pmx = False + proc = None + try: + proc = subprocess.Popen( + ['tar', '-atf', archive_path], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + text=True, + ) + assert proc.stdout is not None + for i, line in enumerate(proc.stdout): + if i > 500: + break + entry = line.strip() + if entry.startswith('./'): + entry = entry[2:] + entry = entry.rstrip('/') + if entry == 'metadata/run_info.env': + is_pmx = True + break + except OSError: + is_pmx = False + finally: + if proc is not None: + try: + proc.kill() + except OSError: + pass + try: + proc.wait(timeout=2) + except (subprocess.TimeoutExpired, OSError): + pass + + _BACKUP_PEEK_CACHE[archive_path] = (st.st_size, int(st.st_mtime), is_pmx) + return is_pmx + + +def _identify_host_backup(archive_path, st, hostname, job_ids): + """Return a dict of {kind, job_id, profile, source_hostname, + detected_via} if this archive is a ProxMenux host backup, or None + if it isn't (or we can't tell). + + Order of confidence (best → worst): + 1. .proxmenux.json sidecar — definitive, written by the + backup script when the archive completes. + 2. Filename matches a known scheduled job_id (.env still on disk). + 3. Filename starts with 'hostcfg-' — the convention for manual + and the recommended convention for scheduled jobs. + 4. Tar-peek for metadata/run_info.env — the universal marker that + every ProxMenux backup carries inside. Caches by mtime/size so + repeat calls are free. + """ + sc = _read_archive_sidecar(archive_path) + if sc is not None: + return { + 'kind': sc.get('kind') or 'manual', + 'job_id': sc.get('job_id'), + 'profile': sc.get('profile'), + 'source_hostname': sc.get('hostname'), + 'detected_via': 'sidecar', + } + + name = os.path.basename(archive_path) + m = _BACKUP_FILENAME_RE.match(name) + stem = m.group(1) if m else None + + if stem and stem in job_ids: + return { + 'kind': 'scheduled', 'job_id': stem, + 'profile': None, 'source_hostname': None, + 'detected_via': 'job_id_match', + } + + if stem == f'hostcfg-{hostname}': + return { + 'kind': 'manual', 'job_id': None, + 'profile': None, 'source_hostname': hostname, + 'detected_via': 'hostcfg_prefix', + } + + if stem and stem.startswith('hostcfg-'): + return { + 'kind': 'manual', 'job_id': None, + 'profile': None, 'source_hostname': None, + 'detected_via': 'hostcfg_prefix', + } + + if _peek_host_backup_marker(archive_path, st): + return { + 'kind': 'legacy', 'job_id': None, + 'profile': None, 'source_hostname': None, + 'detected_via': 'tar_peek', + } + return None + + +def _find_backup_archive_path(archive_id): + """Resolve an archive_id (basename) to an absolute path by checking + every directory we scan for backups (PVE default + per-job DEST_DIRs). + Returns None if the file isn't found anywhere we know about, or if + the resolved file isn't identifiable as a ProxMenux backup. This is + a deliberate allow-list: callers can't request arbitrary host paths + via the API even if they hit the inspect/preflight URLs directly.""" + if '/' in archive_id or archive_id in ('.', '..') or archive_id.startswith('.'): + return None # don't let basename traversal sneak through + if not archive_id.endswith(_BACKUP_TAR_SUFFIXES): + return None # we only handle the tar family + hostname = socket.gethostname() + job_ids = _known_job_ids() + for d in _collect_backup_scan_dirs(): + candidate = os.path.join(d, archive_id) + if not os.path.isfile(candidate): + continue + try: + st = os.stat(candidate) + except OSError: + continue + if _identify_host_backup(candidate, st, hostname, job_ids) is None: + continue # exists but isn't a ProxMenux backup — reject + return candidate + return None + + +@app.route('/api/host-backups/jobs', methods=['GET']) +@require_auth +def api_host_backups_jobs(): + """List scheduled host-backup jobs created via the backup_scheduler + CLI. Each job has a .env file + systemd timer. We report on both, + plus the last-run status when available.""" + import glob + jobs: list = [] + try: + env_files = sorted(glob.glob(f'{_BACKUP_JOBS_DIR}/*.env')) + except OSError: + env_files = [] + + for env_file in env_files: + job_id = os.path.basename(env_file)[:-len('.env')] + job = _parse_job_env(env_file) + + timer_unit = f'proxmenux-backup-{job_id}.timer' + timer_enabled = subprocess.run( + ['systemctl', 'is-enabled', '--quiet', timer_unit], + capture_output=True + ).returncode == 0 + + last_status = None + last_status_file = f'{_BACKUP_LOG_DIR}/{job_id}-last.status' + if os.path.exists(last_status_file): + try: + with open(last_status_file) as f: + last_status = f.read().strip() + except OSError: + pass + + # Next scheduled run from systemctl list-timers + next_run = None + try: + r = subprocess.run( + ['systemctl', 'list-timers', '--no-pager', '--output=json', timer_unit], + capture_output=True, text=True, timeout=5 + ) + if r.returncode == 0 and r.stdout.strip(): + rows = json.loads(r.stdout) + if rows and isinstance(rows, list): + next_run = rows[0].get('next') + except (subprocess.TimeoutExpired, json.JSONDecodeError, ValueError, OSError): + pass + + jobs.append({ + 'id': job_id, + 'destination': (job.get('DEST_DIR') or job.get('DEST') + or job.get('PBS_REPO') or job.get('BORG_REPO') or ''), + 'method': job.get('METHOD') or 'unknown', + 'on_calendar': job.get('ON_CALENDAR') or 'manual', + 'retention': job.get('RETENTION') or '', + 'timer_enabled': timer_enabled, + 'last_status': last_status, + 'next_run': next_run, + }) + return jsonify({'jobs': jobs}) + + +_BACKUP_TAR_SUFFIXES = ('.tar', '.tar.zst', '.tar.gz') + + +@app.route('/api/host-backups/archives', methods=['GET']) +@require_auth +def api_host_backups_archives(): + """List ProxMenux host-backup archives found on disk. + + Scans /var/lib/vz/dump (PVE default — covers manual backups from + backup_host.sh options 1-6) plus every DEST_DIR registered by a + local_tar scheduled job. For each archive, _identify_host_backup() + decides whether it's really a ProxMenux backup using, in order of + confidence: (a) the .proxmenux.json sidecar dropped by the backup + scripts at completion (definitive — survives any future rename of + the .tar); (b) the filename conventions (`hostcfg--` for + manual, `-` for scheduled with the job env still on + disk); (c) a tar-peek for the in-archive `metadata/run_info.env` + marker that every ProxMenux backup ships with (catches legacy + archives and ones copied in from another host). + PBS and Borg backups aren't surfaced in the UI yet.""" + archives: list = [] + seen: set = set() + hostname = socket.gethostname() + job_ids = _known_job_ids() + + for d in _collect_backup_scan_dirs(): + try: + entries = os.listdir(d) + except OSError: + continue + for name in entries: + if not name.endswith(_BACKUP_TAR_SUFFIXES): + continue + tar_path = os.path.join(d, name) + if tar_path in seen: + continue + seen.add(tar_path) + try: + st = os.stat(tar_path) + except OSError: + continue + info = _identify_host_backup(tar_path, st, hostname, job_ids) + if info is None: + continue + archives.append({ + 'id': name, + 'path': tar_path, + 'size_bytes': st.st_size, + 'mtime': int(st.st_mtime), + **info, + }) + + archives.sort(key=lambda a: a['mtime'], reverse=True) + return jsonify({'archives': archives}) + + +@app.route('/api/host-backups/archives//manifest', methods=['GET']) +@require_auth +def api_host_backups_archive_manifest(archive_id): + """Extract the manifest.json embedded inside a backup archive, + using scripts/backup_restore/restore/parse_manifest.sh. Returns the + unwrapped manifest (i.e. without the proxmenux_backup_manifest key).""" + archive_path = _find_backup_archive_path(archive_id) + if not archive_path: + return jsonify({'error': 'archive not found'}), 404 + + parse_script = f'{_PROXMENUX_SCRIPTS_DIR}/backup_restore/restore/parse_manifest.sh' + if not os.path.exists(parse_script): + return jsonify({'error': 'restore tooling not installed on this host', + 'install_hint': 'Run the ProxMenux installer to deploy scripts/backup_restore/'}), 503 + + try: + r = subprocess.run(['bash', parse_script, archive_path], + capture_output=True, text=True, timeout=30) + except (subprocess.TimeoutExpired, OSError) as e: + return jsonify({'error': f'parser invocation failed: {e}'}), 500 + + if r.returncode != 0: + return jsonify({'error': r.stderr.strip() or 'parse_manifest exited non-zero'}), 422 + + try: + return jsonify(json.loads(r.stdout)) + except json.JSONDecodeError: + return jsonify({'error': 'parser output was not valid JSON'}), 500 + + +@app.route('/api/host-backups/archives//preflight', methods=['POST']) +@require_auth +def api_host_backups_archive_preflight(archive_id): + """Run the dry-run preflight + storage + network + driver-plan report + for this archive against the current host. Body: {"mode": ""}. + Modes match restore_modes.sh: full, storage_only, network_only, base, + custom. Returns the combined run_restore.sh JSON report.""" + archive_path = _find_backup_archive_path(archive_id) + if not archive_path: + return jsonify({'error': 'archive not found'}), 404 + + body = request.get_json(silent=True) or {} + mode = body.get('mode', 'full') + if mode not in ('full', 'storage_only', 'network_only', 'base', 'custom'): + return jsonify({'error': f'unknown mode "{mode}"'}), 400 + + run_script = f'{_PROXMENUX_SCRIPTS_DIR}/backup_restore/restore/run_restore.sh' + if not os.path.exists(run_script): + return jsonify({'error': 'restore tooling not installed on this host', + 'install_hint': 'Run the ProxMenux installer to deploy scripts/backup_restore/'}), 503 + + try: + r = subprocess.run( + ['bash', run_script, archive_path, '--mode', mode, '--json'], + capture_output=True, text=True, timeout=120 + ) + except (subprocess.TimeoutExpired, OSError) as e: + return jsonify({'error': f'preflight invocation failed: {e}'}), 500 + + # run_restore.sh exits non-zero when preflight has fails; we still + # want to surface the report so the UI can show what failed. + if not r.stdout.strip(): + return jsonify({'error': r.stderr.strip() or 'no report emitted'}), 500 + try: + return jsonify(json.loads(r.stdout)) + except json.JSONDecodeError: + return jsonify({'error': 'run_restore output was not valid JSON', + 'raw_stderr': r.stderr[:2000]}), 500 + + if __name__ == '__main__': import sys import logging diff --git a/install_proxmenux.sh b/install_proxmenux.sh index 22212201..c76a4f38 100755 --- a/install_proxmenux.sh +++ b/install_proxmenux.sh @@ -921,7 +921,13 @@ install_normal_version() { show_progress $current_step $total_steps "Copying necessary files" cp "./scripts/utils.sh" "$UTILS_FILE" - cp "./menu" "$INSTALL_DIR/$MENU_SCRIPT" + # Atomic install of /usr/local/bin/menu: stage to .new on the same + # filesystem then mv. This protects any reader that happens to open + # the file mid-install from seeing a partial/half-written script + # (the suspected root cause of the post-1.2.2-update reports: + # "menu: line 138 syntax error near unexpected token `$REMOTE_VERSION`") + cp "./menu" "$INSTALL_DIR/${MENU_SCRIPT}.new" + mv -f "$INSTALL_DIR/${MENU_SCRIPT}.new" "$INSTALL_DIR/$MENU_SCRIPT" cp "./version.txt" "$LOCAL_VERSION_FILE" cp "./install_proxmenux.sh" "$BASE_DIR/install_proxmenux.sh" @@ -1078,7 +1084,13 @@ install_translation_version() { msg_ok "Cache file copied with translations." cp "./scripts/utils.sh" "$UTILS_FILE" - cp "./menu" "$INSTALL_DIR/$MENU_SCRIPT" + # Atomic install of /usr/local/bin/menu: stage to .new on the same + # filesystem then mv. This protects any reader that happens to open + # the file mid-install from seeing a partial/half-written script + # (the suspected root cause of the post-1.2.2-update reports: + # "menu: line 138 syntax error near unexpected token `$REMOTE_VERSION`") + cp "./menu" "$INSTALL_DIR/${MENU_SCRIPT}.new" + mv -f "$INSTALL_DIR/${MENU_SCRIPT}.new" "$INSTALL_DIR/$MENU_SCRIPT" cp "./version.txt" "$LOCAL_VERSION_FILE" cp "./install_proxmenux.sh" "$BASE_DIR/install_proxmenux.sh" @@ -1177,24 +1189,47 @@ show_installation_options() { } install_proxmenux() { - show_installation_options - - case "$INSTALL_TYPE" in - "1") + if [[ "${UPDATE_MODE:-0}" == "1" ]]; then + # Update path: the user already accepted "Update now?" in the + # menu. We skip the install-type chooser (their choice is + # preserved — Translation installs leave /opt/googletrans-env + # behind, Normal installs don't) and label the run as an + # "Update" instead of an "Install" so the operator can tell + # which flow they're in. The continuous hand-off back to the + # new menu at the end of this function (exec, see below) + # closes the entire class of bugs of shape + # "menu: line N syntax error" post-update + # because no shell ever returns to a half-written + # /usr/local/bin/menu — the new copy is the only thing parsed. + if [[ -d "$VENV_PATH" && -f "$VENV_PATH/bin/activate" ]]; then show_proxmenux_logo - msg_title "Installing ProxMenux - Normal Version" - install_normal_version - ;; - "2") - show_proxmenux_logo - msg_title "Installing ProxMenux - Translation Version" + msg_title "Updating ProxMenux - Translation Version" install_translation_version - ;; - *) - msg_error "Invalid option selected." - exit 1 - ;; - esac + else + show_proxmenux_logo + msg_title "Updating ProxMenux - Normal Version" + install_normal_version + fi + else + show_installation_options + + case "$INSTALL_TYPE" in + "1") + show_proxmenux_logo + msg_title "Installing ProxMenux - Normal Version" + install_normal_version + ;; + "2") + show_proxmenux_logo + msg_title "Installing ProxMenux - Translation Version" + install_translation_version + ;; + *) + msg_error "Invalid option selected." + exit 1 + ;; + esac + fi if [[ -f "$UTILS_FILE" ]]; then source "$UTILS_FILE" @@ -1210,14 +1245,24 @@ install_proxmenux() { bash "$LOCAL_SCRIPTS/global/cleanup_gpu_hookscripts.sh" || true fi + if [[ "${UPDATE_MODE:-0}" == "1" ]]; then + msg_ok "ProxMenux update complete — relaunching menu..." + # Hand off to the freshly-installed menu binary. `exec` replaces + # this shell so nothing tries to keep parsing the install script + # afterwards, and there is zero time window where any process + # could read a half-rewritten /usr/local/bin/menu (already + # protected by the atomic mv above; this is belt-and-suspenders). + exec "$INSTALL_DIR/$MENU_SCRIPT" + fi + msg_title "ProxMenux has been installed successfully" - + if systemctl is-active --quiet proxmenux-monitor.service; then local server_ip=$(get_server_ip) echo -e "${GN}🌐 ProxMenux Monitor activated${CL}: ${BL}http://${server_ip}:${MONITOR_PORT}${CL}" echo fi - + echo -ne "${GN}" type_text "To run ProxMenux, simply execute this command in the console or terminal:" echo -e "${YWB} menu${CL}" @@ -1226,6 +1271,12 @@ install_proxmenux() { exit 0 } +# Parse CLI flags before anything else so install_proxmenux() can +# branch on UPDATE_MODE without re-reading "$@". +if [[ "${1:-}" == "--update" ]]; then + UPDATE_MODE=1 +fi + if [ "$(id -u)" -ne 0 ]; then msg_error "This script must be run as root." exit 1 diff --git a/install_proxmenux_beta.sh b/install_proxmenux_beta.sh index 0a2fa075..29d20383 100644 --- a/install_proxmenux_beta.sh +++ b/install_proxmenux_beta.sh @@ -623,7 +623,11 @@ install_beta() { mkdir -p "$BASE_DIR/oci" cp "./scripts/utils.sh" "$UTILS_FILE" - cp "./menu" "$INSTALL_DIR/$MENU_SCRIPT" + # Atomic install of /usr/local/bin/menu — see install_proxmenux.sh + # for the rationale (prevents partial-file reads during mid-update + # parsing). + cp "./menu" "$INSTALL_DIR/${MENU_SCRIPT}.new" + mv -f "$INSTALL_DIR/${MENU_SCRIPT}.new" "$INSTALL_DIR/$MENU_SCRIPT" cp "./version.txt" "$LOCAL_VERSION_FILE" 2>/dev/null || true # Store beta version marker @@ -698,6 +702,14 @@ check_stable_available() { } # ── Entry point ──────────────────────────────────────────── +# Parse --update before any work so the welcome banner can be skipped +# and the relaunch hand-off can fire at the end. The flag arrives from +# `menu`'s check_updates_beta() → `exec bash $INSTALL_SCRIPT --update`. +UPDATE_MODE=0 +if [[ "${1:-}" == "--update" ]]; then + UPDATE_MODE=1 +fi + if [ "$(id -u)" -ne 0 ]; then echo -e "${RD}[ERROR] This script must be run as root.${CL}" exit 1 @@ -705,9 +717,13 @@ fi cleanup_corrupted_files show_proxmenux_logo -show_beta_welcome -msg_title "Installing ProxMenux Beta — branch: develop" +if [[ "$UPDATE_MODE" == "1" ]]; then + msg_title "Updating ProxMenux Beta — branch: develop" +else + show_beta_welcome + msg_title "Installing ProxMenux Beta — branch: develop" +fi install_beta # Load utils if available @@ -723,6 +739,14 @@ if [ -x "$BASE_DIR/scripts/global/cleanup_gpu_hookscripts.sh" ]; then bash "$BASE_DIR/scripts/global/cleanup_gpu_hookscripts.sh" || true fi +# Update path: hand off to the freshly-installed menu instead of telling +# the operator to type `menu` again. See install_proxmenux.sh for the +# full rationale — same fix here for the beta channel. +if [[ "$UPDATE_MODE" == "1" ]]; then + msg_ok "ProxMenux Beta update complete — relaunching menu..." + exec "$INSTALL_DIR/$MENU_SCRIPT" +fi + msg_title "ProxMenux Beta installed successfully" if systemctl is-active --quiet proxmenux-monitor.service; then diff --git a/menu b/menu index c6184ea0..2c5796be 100644 --- a/menu +++ b/menu @@ -135,8 +135,22 @@ check_updates_stable() { [[ -z "$LOCAL_VERSION" ]] && return 0 [[ "$LOCAL_VERSION" = "$REMOTE_VERSION" ]] && return 0 - if whiptail --title "$(translate 'Update Available')" \ - --yesno "$(translate 'New version available') ($REMOTE_VERSION)\n\n$(translate 'Do you want to update now?')" \ + # Extract the translated prompt strings into variables FIRST so the + # whiptail line below is trivially parseable. A user on the 1.2.2 + # update path hit: + # menu: line 138: syntax error near unexpected token `$REMOTE_VERSION' + # The original inline form was technically valid bash, but a + # translate() return that contains a stray quote or paren is enough + # to confuse a partially-rewritten file (race during update) or a + # corrupted download. Splitting the strings out closes the entire + # parsing-risk surface for zero behavioural change. + local PROMPT_TITLE PROMPT_AVAIL PROMPT_ASK + PROMPT_TITLE="$(translate 'Update Available')" + PROMPT_AVAIL="$(translate 'New version available')" + PROMPT_ASK="$(translate 'Do you want to update now?')" + + if whiptail --title "$PROMPT_TITLE" \ + --yesno "$PROMPT_AVAIL ($REMOTE_VERSION)\n\n$PROMPT_ASK" \ 10 60 --defaultno; then msg_warn "$(translate 'Starting ProxMenux update...')" diff --git a/scripts/backup_restore/apply_cluster_postboot.sh b/scripts/backup_restore/apply_cluster_postboot.sh new file mode 100755 index 00000000..d81b1632 --- /dev/null +++ b/scripts/backup_restore/apply_cluster_postboot.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# ========================================================== +# ProxMenux - Apply Cluster Configs (post-boot) +# ========================================================== +# Fires AFTER pve-cluster.service is up, when /etc/pve is +# the live pmxcfs FUSE mount. We can write individual files +# to /etc/pve at this point and they propagate through the +# cluster filesystem normally — no need to stop pve-cluster +# (which would be unsafe at this stage of boot). +# +# Trigger: apply_pending_restore.sh writes a marker file at +# /var/lib/proxmenux/cluster-apply-pending whose contents is +# the absolute path of the recovery dir containing the +# extracted /etc/pve content. The systemd unit has +# ConditionPathExists=, so on a normal boot (no +# marker), the unit short-circuits and does nothing. + +set +u + +MARKER="${PMX_CLUSTER_APPLY_MARKER:-/var/lib/proxmenux/cluster-apply-pending}" +LOG_DIR="${PMX_LOG_DIR:-/var/log/proxmenux}" + +mkdir -p "$LOG_DIR" >/dev/null 2>&1 || true +LOG_FILE="${LOG_DIR}/proxmenux-cluster-postboot-$(date +%Y%m%d_%H%M%S).log" +exec >>"$LOG_FILE" 2>&1 + +echo "=== ProxMenux cluster post-boot apply at $(date -Iseconds) ===" + +if [[ ! -f "$MARKER" ]]; then + echo "No marker found at $MARKER — nothing to apply." + exit 0 +fi + +# Marker is env-style key=value, written by apply_pending_restore.sh. +# Defaults so a malformed marker still gives us safe behaviour. +RECOVERY_ROOT="" +PENDING_DIR="" +NEEDS_INITRAMFS=0 +NEEDS_GRUB=0 +# shellcheck source=/dev/null +source "$MARKER" +echo "Recovery root: $RECOVERY_ROOT" +echo "Pending dir: $PENDING_DIR" +echo "Needs initramfs: $NEEDS_INITRAMFS" +echo "Needs grub: $NEEDS_GRUB" + +if [[ -z "$RECOVERY_ROOT" || ! -d "$RECOVERY_ROOT" ]]; then + echo "Recovery root invalid — aborting cleanly." + rm -f "$MARKER" + exit 0 +fi + +SOURCE_PVE="$RECOVERY_ROOT/etc/pve" +if [[ ! -d "$SOURCE_PVE" ]]; then + echo "No /etc/pve content in recovery dir — nothing to do." + rm -f "$MARKER" + exit 0 +fi + +# Wait for pmxcfs to be fully writable. The After=pve-cluster.service +# in our unit gets us past the service-start point, but on slow boots +# the FUSE mount can take a few extra seconds to settle. +echo "Waiting for /etc/pve to be writable..." +for i in {1..60}; do + if [[ -d /etc/pve ]] \ + && touch "/etc/pve/.proxmenux-test-$$" 2>/dev/null; then + rm -f "/etc/pve/.proxmenux-test-$$" 2>/dev/null + echo "/etc/pve writable after ${i}s" + break + fi + sleep 1 +done + +# ── Detect source node name for cross-host node rename ──── +# The source backup's node dir is whatever the source host +# was called; we copy its contents into THIS host's node +# dir. Two sources for the source hostname, in order of +# preference: +# 1. metadata/run_info.env from the pending dir (definitive) +# 2. The first (and usually only) dir under nodes/ in the +# source backup — works when metadata is missing +SRC_NODE="" +if [[ -n "$PENDING_DIR" ]]; then + META_RUN_INFO=$(find "$PENDING_DIR" -maxdepth 3 -name run_info.env 2>/dev/null | head -1) + if [[ -n "$META_RUN_INFO" && -f "$META_RUN_INFO" ]]; then + SRC_NODE=$(grep -m1 '^hostname=' "$META_RUN_INFO" 2>/dev/null | cut -d= -f2- | tr -d '[:space:]') + fi +fi +if [[ -z "$SRC_NODE" && -d "$SOURCE_PVE/nodes" ]]; then + SRC_NODE=$(find "$SOURCE_PVE/nodes" -mindepth 1 -maxdepth 1 -type d 2>/dev/null | head -1) + SRC_NODE=$(basename "$SRC_NODE" 2>/dev/null) +fi +CUR_NODE=$(hostname) +echo "Source node: ${SRC_NODE:-(unknown)} / Current node: ${CUR_NODE}" + +# ── Apply EVERY top-level file in /etc/pve ──────────────── +# Anything that's a regular file at the root of /etc/pve +# (datacenter.cfg, storage.cfg, user.cfg, domains.cfg, +# vzdump.cron, jobs.cfg, replication.cfg, ceph.conf, +# corosync.conf if cluster, etc). pmxcfs symlinks like +# /etc/pve/local, /etc/pve/lxc, /etc/pve/qemu-server, +# /etc/pve/openvz are auto-created by pmxcfs and we skip +# them — copying over them throws "Operation not permitted". +echo "" +echo "── Global config files ──" +copied_global=0 +PMX_SYMLINKS_SKIP="local lxc qemu-server openvz" +for src in "$SOURCE_PVE"/*; do + [[ -f "$src" ]] || continue + name=$(basename "$src") + # Skip files that mirror pmxcfs symlinks + skip=0 + for s in $PMX_SYMLINKS_SKIP; do + [[ "$name" == "$s" ]] && { skip=1; break; } + done + (( skip )) && continue + if cp -f "$src" "/etc/pve/$name" 2>&1; then + echo " ✓ $name" + ((copied_global++)) + else + echo " ✗ $name (cp failed)" + fi +done + +# ── Subdirectories we want to preserve verbatim ─────────── +# Each gets contents copied flat (no recursive dir copy of +# symlinks). These are the "shared cluster state" dirs. +echo "" +echo "── Cluster subdirectories ──" +copied_subdirs=0 +for subdir in firewall sdn mapping virtual-guest priv ha; do + src_dir="$SOURCE_PVE/$subdir" + [[ -d "$src_dir" ]] || continue + mkdir -p "/etc/pve/$subdir" 2>/dev/null || true + while IFS= read -r f; do + rel="${f#"$src_dir"/}" + dst="/etc/pve/$subdir/$rel" + if [[ -d "$f" ]]; then + mkdir -p "$dst" 2>/dev/null || true + elif [[ -f "$f" ]]; then + mkdir -p "$(dirname "$dst")" 2>/dev/null || true + cp -f "$f" "$dst" 2>/dev/null && ((copied_subdirs++)) + fi + done < <(find "$src_dir" -mindepth 1 2>/dev/null) + echo " ✓ $subdir/ (subtree)" +done + +# ── Apply guest configs into THIS node's dir ────────────── +# This is the bit that makes `pct list` / `qm list` show +# the restored guests. We deliberately copy from the +# source's node dir into the current host's node dir, so +# cross-host restores Just Work without renaming anything. +echo "" +echo "── Guest configs (LXC + QEMU) ──" +copied_guests=0 +skipped_guests=0 +if [[ -n "$SRC_NODE" ]] && [[ -d "$SOURCE_PVE/nodes/$SRC_NODE" ]]; then + for kind in lxc qemu-server; do + src_dir="$SOURCE_PVE/nodes/$SRC_NODE/$kind" + dst_dir="/etc/pve/nodes/$CUR_NODE/$kind" + [[ -d "$src_dir" ]] || continue + mkdir -p "$dst_dir" 2>/dev/null || true + for conf in "$src_dir"/*.conf; do + [[ -f "$conf" ]] || continue + vmid=$(basename "$conf" .conf) + if [[ -e "$dst_dir/$vmid.conf" ]]; then + echo " ⚠ $kind/$vmid.conf already exists on this host — skipping (avoid clash)" + ((skipped_guests++)) + continue + fi + if cp -f "$conf" "$dst_dir/$vmid.conf" 2>&1; then + echo " ✓ $kind/$vmid.conf" + ((copied_guests++)) + else + echo " ✗ $kind/$vmid.conf (cp failed)" + fi + done + done +else + echo " (no source node dir to copy from)" +fi + +# ── Done with cluster config apply ───────────────────────── +echo "" +echo "Cluster summary: globals=$copied_global, subdirs=$copied_subdirs, guests=$copied_guests, guest-clashes-skipped=$skipped_guests" + +# Remove the marker NOW (before the slow maintenance step +# below) so if the operator reboots mid-maintenance, we +# don't redo the (idempotent but wasteful) cluster apply. +# Maintenance below is also idempotent on re-run but takes +# 10+ min, so we'd rather not repeat it either — see the +# marker handling in the maintenance block. +rm -f "$MARKER" + +# ── Post-restore maintenance (slow, deferrable) ──────────── +# After a host-config restore, we need to: +# - update-initramfs -u -k all → so /etc/modules /etc/modprobe.d +# /etc/initramfs-tools changes get baked into the initramfs +# of every installed kernel for the NEXT boot. +# - update-grub → so /etc/default/grub changes land in +# /boot/grub/grub.cfg for the NEXT boot. +# +# These are EXPENSIVE (initramfs build per kernel × 3 = 5-10 min; +# grub a few seconds) but the user's system is already fully up +# at this point: they can SSH in, use PVE, do anything — these +# run in the background and finish whenever they finish. The +# unit's TimeoutStartSec=900 (set in apply_pending_restore.sh) +# gives us a 15-min cushion. We log progress to the same log +# file so the operator can `tail -f` if curious. +echo "" +echo "── Post-restore maintenance ──" +# Only do these if the apply_pending_restore.sh's path-trigger +# analysis said they're needed. On a restore that didn't touch +# /etc/modules /etc/default/grub etc., both flags are 0 and we +# skip the slow rebuild entirely. +MAINT_MARKER="/var/lib/proxmenux/post-restore-maintenance-pending" +if [[ "$NEEDS_INITRAMFS" == "1" ]] || [[ "$NEEDS_GRUB" == "1" ]]; then + mkdir -p /var/lib/proxmenux >/dev/null 2>&1 || true + printf 'started: %s\n' "$(date -Iseconds)" > "$MAINT_MARKER" +fi + +if [[ "$NEEDS_INITRAMFS" == "1" ]] && command -v update-initramfs >/dev/null 2>&1; then + echo "Running: update-initramfs -u -k all (5-10 min — restore touched initramfs inputs)" + if update-initramfs -u -k all 2>&1 | tail -10; then + echo " ✓ update-initramfs done" + else + echo " ✗ update-initramfs failed (system still boots; re-run manually)" + fi +else + echo "Skipping update-initramfs (restore didn't touch modules/initramfs-tools/crypttab)" +fi + +if [[ "$NEEDS_GRUB" == "1" ]] && command -v update-grub >/dev/null 2>&1; then + echo "Running: update-grub" + if update-grub 2>&1 | tail -3; then + echo " ✓ update-grub done" + else + echo " ✗ update-grub failed (re-run manually)" + fi +else + echo "Skipping update-grub (restore didn't touch /etc/default/grub or /etc/kernel)" +fi + +# Clean up the maintenance marker now that we're done. +rm -f "$MAINT_MARKER" + +echo "" +echo "=== Apply finished at $(date -Iseconds) ===" +echo "Log: $LOG_FILE" diff --git a/scripts/backup_restore/apply_pending_restore.sh b/scripts/backup_restore/apply_pending_restore.sh index 2e756be8..4ee3800d 100644 --- a/scripts/backup_restore/apply_pending_restore.sh +++ b/scripts/backup_restore/apply_pending_restore.sh @@ -7,8 +7,8 @@ PENDING_BASE="${PMX_RESTORE_PENDING_BASE:-/var/lib/proxmenux/restore-pending}" CURRENT_LINK="${PENDING_BASE}/current" LOG_DIR="${PMX_RESTORE_LOG_DIR:-/var/log/proxmenux}" DEST_PREFIX="${PMX_RESTORE_DEST_PREFIX:-/}" -PRE_BACKUP_BASE="${PMX_RESTORE_PRE_BACKUP_BASE:-/root/proxmenux-pre-restore}" -RECOVERY_BASE="${PMX_RESTORE_RECOVERY_BASE:-/root/proxmenux-recovery}" +PRE_BACKUP_BASE="${PMX_RESTORE_PRE_BACKUP_BASE:-/var/lib/proxmenux/pre-restore}" +RECOVERY_BASE="${PMX_RESTORE_RECOVERY_BASE:-/var/lib/proxmenux/recovery}" mkdir -p "$LOG_DIR" "$PENDING_BASE/completed" >/dev/null 2>&1 || true LOG_FILE="${LOG_DIR}/proxmenux-restore-onboot-$(date +%Y%m%d_%H%M%S).log" @@ -70,7 +70,14 @@ while IFS= read -r rel; do continue fi - # Never restore cluster virtual filesystem data live. + # Cluster data (/etc/pve, /var/lib/pve-cluster) goes into a + # recovery dir for forensics/rollback, but unlike the live- + # menu apply path we ALSO apply it for real here: at this + # point in boot we're before networking.service, nothing is + # talking to the cluster yet, so a `systemctl stop pve-cluster` + # → copy → `systemctl start pve-cluster` is safe. This is the + # whole reason the operator picked "schedule remaining for + # next boot" instead of doing it live from SSH. if [[ "$rel" == etc/pve* ]] || [[ "$rel" == var/lib/pve-cluster* ]]; then if [[ -z "$cluster_recovery_root" ]]; then cluster_recovery_root="${RECOVERY_BASE}/$(date +%Y%m%d_%H%M%S)-onboot" @@ -78,6 +85,10 @@ while IFS= read -r rel; do fi mkdir -p "$cluster_recovery_root/$(dirname "$rel")" >/dev/null 2>&1 || true cp -a "$src" "$cluster_recovery_root/$rel" >/dev/null 2>&1 || true + # Mark that we need to do the live apply at the end of + # the loop (we don't want to stop/start pve-cluster + # per-file — once is enough). + cluster_live_apply=1 ((skipped++)) continue fi @@ -113,8 +124,13 @@ while IFS= read -r rel; do done <"$APPLY_LIST" systemctl daemon-reload >/dev/null 2>&1 || true -command -v update-initramfs >/dev/null 2>&1 && update-initramfs -u -k all >/dev/null 2>&1 || true -command -v update-grub >/dev/null 2>&1 && update-grub >/dev/null 2>&1 || true + +# `update-initramfs -u -k all` and `update-grub` used to live here +# but: (a) they take 5-10 minutes for 3 kernels, hanging early-boot +# for that long, and (b) ifupdown2 was waiting on us. They now run +# AFTER pve-cluster is up via the apply_cluster_postboot.sh script +# we hook below, in the background where the user is already on the +# login prompt and using the system. Zero manual steps needed. echo "Applied: $applied" echo "Skipped: $skipped" @@ -122,6 +138,8 @@ echo "Failed: $failed" echo "Backup before restore: $backup_root" if [[ -n "$cluster_recovery_root" ]]; then + # Always write the manual-helper script first — that's the + # rollback path if the live apply below blows up. helper="${cluster_recovery_root}/apply-cluster-restore.sh" cat > "$helper" </dev/null 2>&1 || true + { + printf 'RECOVERY_ROOT=%s\n' "$cluster_recovery_root" + printf 'PENDING_DIR=%s\n' "$PENDING_DIR" + printf 'NEEDS_INITRAMFS=%s\n' "$NEEDS_INITRAMFS" + printf 'NEEDS_GRUB=%s\n' "$NEEDS_GRUB" + } > /var/lib/proxmenux/cluster-apply-pending + chmod 600 /var/lib/proxmenux/cluster-apply-pending + + # Install the systemd unit. Idempotent: overwrite if it + # already exists (so script changes get picked up). + cat > /etc/systemd/system/proxmenux-apply-cluster-postboot.service </dev/null 2>&1 || true + systemctl enable proxmenux-apply-cluster-postboot.service >/dev/null 2>&1 || true + + # `systemctl enable` only adds the unit to multi-user.target.wants/. + # It does NOT pull the unit into the currently-running boot + # transaction — by the time we run, multi-user.target may have + # already collected its wants. `start --no-block` schedules the + # unit for activation respecting its After= ordering (pve-cluster + # comes up first), without blocking apply_pending_restore.sh + # itself. Without this, the postboot unit only fires on the + # NEXT reboot, defeating the "single reboot, zero manual steps" + # promise. + systemctl start --no-block proxmenux-apply-cluster-postboot.service >/dev/null 2>&1 || true + + echo "Cluster apply will run automatically after pve-cluster comes up." + echo "Fallback manual: bash $helper" + fi fi if [[ "$failed" -eq 0 ]]; then diff --git a/scripts/backup_restore/backup_host.sh b/scripts/backup_restore/backup_host.sh index 055fabeb..e9268b0c 100644 --- a/scripts/backup_restore/backup_host.sh +++ b/scripts/backup_restore/backup_host.sh @@ -106,9 +106,19 @@ _bk_pbs() { epoch=$(date +%s) t_start=$SECONDS + # We back up the WHOLE staging_root (rootfs/ + metadata/) into + # the .pxar — earlier versions used `$staging_root/rootfs` as + # the source, which left metadata/ (hostname, pveversion, + # selected paths, etc.) out of the archive. The compat check + # in restore then had nothing to read and degraded to + # cross-host warnings even on same-host restores. Old PBS + # snapshots created with the rootfs-only source still restore + # correctly via case 3 in _rs_check_layout (which wraps a flat + # etc/var/root/usr layout into rootfs/ and creates an empty + # metadata/), so this change is backward-compatible. local -a cmd=( proxmox-backup-client backup - "hostcfg.pxar:$staging_root/rootfs" + "hostcfg.pxar:$staging_root" --repository "$HB_PBS_REPOSITORY" --backup-type host --backup-id "$backup_id" @@ -121,8 +131,20 @@ _bk_pbs() { if env \ PBS_PASSWORD="$HB_PBS_SECRET" \ PBS_ENCRYPTION_PASSWORD="${HB_PBS_ENC_PASS:-}" \ + PBS_FINGERPRINT="${HB_PBS_FINGERPRINT:-}" \ "${cmd[@]}" 2>&1 | tee -a "$log_file"; then + # Main backup OK — also upload the keyfile recovery blob if + # one was configured. This runs as a SEPARATE backup group + # (`host/proxmenux-keyrecovery-`) with NO --keyfile, + # so PBS stores it as a plain (non-PBS-encrypted) blob that + # can be retrieved during fresh-install recovery. The blob + # is still passphrase-protected by openssl. + if [[ -f "$HB_STATE_DIR/pbs-key.recovery.enc" ]]; then + hb_pbs_upload_recovery_blob "$epoch" \ + || msg_warn "$(translate "Recovery blob upload failed — main backup is OK, but keyfile recovery from PBS will not be available for this snapshot.")" + fi + elapsed=$((SECONDS - t_start)) local snap_time snap_time=$(date -d "@$epoch" '+%Y-%m-%dT%H:%M:%S' 2>/dev/null || date -r "$epoch" '+%Y-%m-%dT%H:%M:%S' 2>/dev/null || echo "$epoch") @@ -135,7 +157,11 @@ _bk_pbs() { echo -e "${TAB}${BGN}$(translate "Data size:")${CL} ${BL}${staged_size}${CL}" echo -e "${TAB}${BGN}$(translate "Duration:")${CL} ${BL}$(hb_human_elapsed "$elapsed")${CL}" echo -e "${TAB}${BGN}$(translate "Encryption:")${CL} ${BL}${_pbs_enc_label}${CL}" - echo -e "${TAB}${BGN}$(translate "Log:")${CL} ${BL}${log_file}${CL}" + # Only point at the log if it actually has output. On a clean + # success the underlying tool is silent and surfacing an empty + # file path just confuses the operator into thinking they need + # to look at it. + [[ -s "$log_file" ]] && echo -e "${TAB}${BGN}$(translate "Log:")${CL} ${BL}${log_file}${CL}" echo -e "" msg_ok "$(translate "Backup completed successfully.")" else @@ -219,7 +245,7 @@ _bk_borg() { echo -e "${TAB}${BGN}$(translate "Compressed size:")${CL} ${BL}${borg_compressed}${CL}" echo -e "${TAB}${BGN}$(translate "Duration:")${CL} ${BL}$(hb_human_elapsed "$elapsed")${CL}" echo -e "${TAB}${BGN}$(translate "Encryption:")${CL} ${BL}${_borg_enc_label}${CL}" - echo -e "${TAB}${BGN}$(translate "Log:")${CL} ${BL}${log_file}${CL}" + [[ -s "$log_file" ]] && echo -e "${TAB}${BGN}$(translate "Log:")${CL} ${BL}${log_file}${CL}" echo -e "" msg_ok "$(translate "Backup completed successfully.")" else @@ -306,6 +332,11 @@ _bk_local() { elapsed=$((SECONDS - t_start)) if [[ $tar_ok -eq 1 && -f "$archive" ]]; then + # Drop a sidecar JSON next to the archive so the Monitor + # (and any future tooling) can identify this as a + # ProxMenux host backup regardless of any future rename. + hb_write_archive_sidecar "$archive" "manual" "" "$profile_mode" || true + archive_size=$(hb_file_size "$archive") echo -e "" echo -e "${TAB}${BOLD}$(translate "Backup completed:")${CL}" @@ -314,7 +345,7 @@ _bk_local() { echo -e "${TAB}${BGN}$(translate "Data size:")${CL} ${BL}${staged_size}${CL}" echo -e "${TAB}${BGN}$(translate "Archive size:")${CL} ${BL}${archive_size}${CL}" echo -e "${TAB}${BGN}$(translate "Duration:")${CL} ${BL}$(hb_human_elapsed "$elapsed")${CL}" - echo -e "${TAB}${BGN}$(translate "Log:")${CL} ${BL}${log_file}${CL}" + [[ -s "$log_file" ]] && echo -e "${TAB}${BGN}$(translate "Log:")${CL} ${BL}${log_file}${CL}" echo -e "" msg_ok "$(translate "Backup completed successfully.")" else @@ -397,18 +428,60 @@ _rs_extract_pbs() { hb_require_cmd proxmox-backup-client proxmox-backup-client || return 1 hb_select_pbs_repository || return 1 - msg_info "$(translate "Listing snapshots from PBS...")" + # If we're restoring on a fresh host (or one where the keyfile + # was wiped) the encrypted snapshots are unreadable until we + # restore the keyfile. Look for a recovery blob in PBS and let + # the operator decrypt it with their passphrase. We try this + # silently up-front so subsequent steps (snapshot list, files, + # restore) Just Work whether or not the snapshots happen to be + # encrypted. Failure here is non-fatal: a missing recovery + # blob plus an unencrypted snapshot is a perfectly valid case + # and the rest of the flow handles it. + if [[ ! -f "$HB_STATE_DIR/pbs-key.conf" ]]; then + hb_pbs_try_keyfile_recovery "$HB_STATE_DIR/pbs-key.conf" || true + fi + + # Current proxmox-backup-client prints both `snapshot list` and + # `snapshot files` as a Unicode box-drawing table even when piped + # — the old awk-by-whitespace parser captures the `│` column + # separators instead of the data and ends up with an empty array. + # We now request --output-format json and parse with jq, then + # convert the epoch returned by `snapshot list` to the UTC ISO + # form (`YYYY-MM-DDTHH:MM:SSZ`) that `snapshot files` and + # `restore` actually accept as the snapshot path. + # + # Use dialog --infobox (not msg_info/msg_ok) so the "Listing…" + # placeholder lives inside the dialog system and disappears the + # moment the next dialog draws — no terminal text leaks between + # menus. + dialog --backtitle "ProxMenux" \ + --title "$(translate "Listing snapshots from PBS")" \ + --infobox "\n$(translate "Querying repository:") $HB_PBS_REPOSITORY" 7 78 mapfile -t snapshots < <( PBS_PASSWORD="$HB_PBS_SECRET" \ + PBS_FINGERPRINT="${HB_PBS_FINGERPRINT:-}" \ proxmox-backup-client snapshot list \ - --repository "$HB_PBS_REPOSITORY" 2>/dev/null \ - | awk '$2 ~ /^host\// {print $2}' \ + --repository "$HB_PBS_REPOSITORY" \ + --output-format json 2>/dev/null \ + | jq -r '.[] | select(."backup-type" == "host" and ((."backup-id" | startswith("proxmenux-keyrecovery-")) | not)) | "\(."backup-type")|\(."backup-id")|\(."backup-time")"' 2>/dev/null \ + | while IFS='|' read -r _type _id _epoch; do + local _iso + _iso=$(date -u -d "@${_epoch}" '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null \ + || date -u -r "${_epoch}" '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null \ + || echo "${_epoch}") + echo "${_type}/${_id}/${_iso}" + done \ | sort -r | awk '!seen[$0]++' ) - msg_ok "$(translate "Snapshot list retrieved.")" if [[ ${#snapshots[@]} -eq 0 ]]; then - msg_error "$(translate "No host snapshots found in this PBS repository.")" + # Surface error as a blocking dialog so the operator can read + # it. msg_error alone gets erased the moment we `return 1` + # because the restore_menu loop redraws the source picker + # immediately afterward. + dialog --backtitle "ProxMenux" --title "$(translate "No snapshots")" \ + --msgbox "$(translate "No host snapshots were found in this PBS repository:")"$'\n\n'"$HB_PBS_REPOSITORY" \ + 10 78 return 1 fi @@ -421,14 +494,23 @@ _rs_extract_pbs() { "$HB_UI_MENU_H" "$HB_UI_MENU_W" "$HB_UI_MENU_LIST" "${menu[@]}" 3>&1 1>&2 2>&3) || return 1 snapshot="${snapshots[$((sel-1))]}" + # `snapshot files` filenames carry a `.didx` (chunk index) or + # `.blob` suffix that doesn't match the bare `.pxar` name that + # `restore` expects. Strip it before filtering. mapfile -t archives < <( PBS_PASSWORD="$HB_PBS_SECRET" \ + PBS_FINGERPRINT="${HB_PBS_FINGERPRINT:-}" \ proxmox-backup-client snapshot files "$snapshot" \ - --repository "$HB_PBS_REPOSITORY" 2>/dev/null \ - | awk '{print $1}' | grep '\.pxar$' || true + --repository "$HB_PBS_REPOSITORY" \ + --output-format json 2>/dev/null \ + | jq -r '.[].filename' 2>/dev/null \ + | sed -e 's/\.didx$//' -e 's/\.blob$//' \ + | grep '\.pxar$' || true ) if [[ ${#archives[@]} -eq 0 ]]; then - msg_error "$(translate "No .pxar archives found in selected snapshot.")" + dialog --backtitle "ProxMenux" --title "$(translate "No archives")" \ + --msgbox "$(translate "No .pxar archives were found in this snapshot:")"$'\n\n'"$snapshot" \ + 10 78 return 1 fi @@ -462,23 +544,51 @@ _rs_extract_pbs() { enc_pass="$(<"$HB_STATE_DIR/pbs-encryption-pass.txt")" : > "$log_file" + # PIPESTATUS check: `... | tee` masks the binary's exit code + # with tee's (always 0). Without this, a failed decrypt or + # missing keyfile would silently "succeed" — the staging + # would be empty/garbage and _rs_check_layout would then say + # "Incompatible archive", which is misleading. We capture the + # client's actual exit code separately. + local pbs_rc # shellcheck disable=SC2086 - if env \ + env \ PBS_PASSWORD="$HB_PBS_SECRET" \ PBS_ENCRYPTION_PASSWORD="${enc_pass}" \ + PBS_FINGERPRINT="${HB_PBS_FINGERPRINT:-}" \ proxmox-backup-client restore \ "$snapshot" "$archive" "$staging_root" \ --repository "$HB_PBS_REPOSITORY" \ --allow-existing-dirs true \ $key_opt \ - 2>&1 | tee -a "$log_file"; then + 2>&1 | tee -a "$log_file" + pbs_rc=${PIPESTATUS[0]} + + if [[ $pbs_rc -eq 0 ]]; then msg_ok "$(translate "Extraction completed.")" return 0 - else - msg_error "$(translate "PBS extraction failed.")" - hb_show_log "$log_file" "$(translate "PBS restore error log")" - return 1 fi + + # Decide whether this is the "encrypted snapshot without + # keyfile" pattern. proxmox-backup-client emits messages like + # `unable to load encryption key` / `no key found` / `Failed + # to decrypt` when that's the cause. If so, surface a helpful + # error rather than the raw log. + local extra_hint="" + if grep -qiE 'encryption key|unable to (load|read) key|no key (file|found)|decrypt|failed to decrypt' "$log_file" 2>/dev/null; then + extra_hint=$'\n\n'"$(translate "This snapshot is encrypted but no keyfile is available on this host.")" + if [[ -f "$HB_STATE_DIR/pbs-key.conf" ]]; then + extra_hint+=$'\n\n'"$(translate "A keyfile is present but doesn't match the one used to create the snapshot. Make sure you have the correct keyfile from the source host.")" + else + extra_hint+=$'\n\n'"$(translate "No keyfile recovery copy was found in PBS for this snapshot — it was created before the recovery feature existed. The encrypted content cannot be recovered.")" + fi + fi + + dialog --backtitle "ProxMenux" --title "$(translate "PBS extraction failed")" \ + --msgbox "$(translate "Could not extract from PBS.")"$'\n\n'"$(translate "Snapshot:") $snapshot"$'\n'"$(translate "Archive:") $archive$extra_hint" \ + 16 78 + hb_show_log "$log_file" "$(translate "PBS restore error log")" + return 1 } _rs_extract_borg() { @@ -544,41 +654,81 @@ _rs_extract_borg() { _rs_extract_local() { local staging_root="$1" - local log_file - log_file="/tmp/proxmenux-local-restore-$(date +%Y%m%d_%H%M%S).log" - local source_dir archive + local log_file source_dir archive hb_require_cmd tar tar || return 1 source_dir=$(hb_prompt_restore_source_dir) || return 1 - archive=$(hb_prompt_local_archive "$source_dir" \ - "$(translate "Select backup archive to restore")") || return 1 - show_proxmenux_logo - msg_title "$(translate "Restore from local archive → staging")" - echo -e "" - echo -e "${TAB}${BGN}$(translate "Archive:")${CL} ${BL}${archive}${CL}" - echo -e "${TAB}${BGN}$(translate "Archive size:")${CL} ${BL}$(hb_file_size "$archive")${CL}" - echo -e "${TAB}${BGN}$(translate "Staging directory:")${CL} ${BL}${staging_root}${CL}" - echo -e "" - msg_info "$(translate "Extracting archive...")" - stop_spinner + # Loop the picker on every recoverable failure so a corrupt + # archive doesn't dump the operator back to the top-level + # restore menu (which they then read as "the script never + # offered me a restore mode"). They stay in the same dir, + # pick another archive, or explicitly cancel out. + while true; do + archive=$(hb_prompt_local_archive "$source_dir" \ + "$(translate "Select backup archive to restore")") || return 1 - : > "$log_file" - if [[ "$archive" == *.zst ]]; then - tar --zstd -xf "$archive" -C "$staging_root" >>"$log_file" 2>&1 - else - tar -xf "$archive" -C "$staging_root" >>"$log_file" 2>&1 - fi - local rc=$? + log_file="/tmp/proxmenux-local-restore-$(date +%Y%m%d_%H%M%S).log" + + show_proxmenux_logo + msg_title "$(translate "Restore from local archive → staging")" + echo -e "" + echo -e "${TAB}${BGN}$(translate "Archive:")${CL} ${BL}${archive}${CL}" + echo -e "${TAB}${BGN}$(translate "Archive size:")${CL} ${BL}$(hb_file_size "$archive")${CL}" + echo -e "${TAB}${BGN}$(translate "Staging directory:")${CL} ${BL}${staging_root}${CL}" + echo -e "" + msg_info "$(translate "Extracting archive...")" + stop_spinner + + : > "$log_file" + # Wipe staging from a previous failed attempt so we don't + # mix partial extractions across retries. + find "$staging_root" -mindepth 1 -maxdepth 1 -exec rm -rf {} + 2>/dev/null + + if [[ "$archive" == *.zst ]]; then + tar --zstd -xf "$archive" -C "$staging_root" >>"$log_file" 2>&1 + else + tar -xf "$archive" -C "$staging_root" >>"$log_file" 2>&1 + fi + local rc=$? + + if [[ $rc -eq 0 ]]; then + msg_ok "$(translate "Extraction completed.")" + return 0 + fi - if [[ $rc -eq 0 ]]; then - msg_ok "$(translate "Extraction completed.")" - return 0 - else msg_error "$(translate "Extraction failed.")" hb_show_log "$log_file" "$(translate "Local restore error log")" - return 1 - fi + + # Recoverable: most often a corrupted archive (interrupted + # mid-write, bad disk sector, partial copy). Give the user + # a clear next step instead of silently bouncing back. + local recover_msg recover_choice + recover_msg="$(translate "The archive could not be extracted.")"$'\n\n' + recover_msg+="$(translate "Most common cause: the archive is corrupted (interrupted write, partial copy, or storage issue).")"$'\n\n' + recover_msg+="$(translate "Archive:") $archive" + recover_choice=$(dialog --backtitle "ProxMenux" \ + --title "$(translate "Restore failed")" \ + --menu "$recover_msg" 16 80 4 \ + 1 "$(translate "Try another archive")" \ + 2 "$(translate "Delete this corrupt archive and pick another")" \ + 0 "$(translate "Cancel restore")" \ + 3>&1 1>&2 2>&3) || return 1 + + case "$recover_choice" in + 1) continue ;; # back to the picker + 2) + if whiptail --title "$(translate "Delete archive")" \ + --yesno "$(translate "Permanently delete this archive and its sidecar?")"$'\n\n'"$archive" \ + 11 78; then + rm -f "$archive" "${archive}.proxmenux.json" + msg_ok "$(translate "Archive deleted.")" + fi + continue + ;; + 0|*) return 1 ;; + esac + done } # Ensure staging has rootfs/ layout (Borg may nest) @@ -714,10 +864,12 @@ _rs_preview_diff() { _rs_export_to_file() { local staging_root="$1" - local dest_dir archive archive_size t_start elapsed + local dest_dir archive archive_size t_start elapsed log_file + local stage_bytes pipefail_state tar_ok dest_dir=$(hb_prompt_dest_dir) || return 1 archive="$dest_dir/hostcfg-export-$(hostname)-$(date +%Y%m%d_%H%M%S).tar.gz" + log_file="/tmp/proxmenux-export-$(date +%Y%m%d_%H%M%S).log" show_proxmenux_logo msg_title "$(translate "Export backup data to file")" @@ -727,11 +879,40 @@ _rs_export_to_file() { echo -e "" echo -e "${TAB}$(translate "No changes will be made to the running system.")" echo -e "" - msg_info "$(translate "Creating export archive...")" stop_spinner t_start=$SECONDS - if tar -czf "$archive" -C "$staging_root" . 2>/dev/null; then + tar_ok=0 + : > "$log_file" + + if command -v pv >/dev/null 2>&1; then + # Stream tar through pv so the operator sees a live progress + # bar instead of staring at a frozen title for minutes. We + # mirror the same pattern used by the local backup path + # (_bk_local) so the experience is consistent across + # create-archive and export-archive flows. + stage_bytes=$(du -sb "$staging_root" 2>/dev/null | awk '{print $1}') + pipefail_state=$(set -o | awk '$1=="pipefail" {print $2}') + set -o pipefail + echo -e "${TAB}$(translate "Compressing") $(numfmt --to=iec-i --suffix=B "$stage_bytes" 2>/dev/null || printf '%s bytes' "$stage_bytes") → $archive" + echo + if tar -cf - -C "$staging_root" . 2>>"$log_file" \ + | pv -s "$stage_bytes" | gzip > "$archive" 2>>"$log_file"; then + tar_ok=1 + fi + [[ "$pipefail_state" == "off" ]] && set +o pipefail + else + # pv isn't installed — at least tell the operator something + # is happening and hint at the package they can install for + # a better experience next time. + msg_info "$(translate "Creating export archive (install 'pv' for a live progress bar)...")" + stop_spinner + if tar -czf "$archive" -C "$staging_root" . >>"$log_file" 2>&1; then + tar_ok=1 + fi + fi + + if [[ $tar_ok -eq 1 && -f "$archive" ]]; then elapsed=$((SECONDS - t_start)) archive_size=$(hb_file_size "$archive") echo -e "" @@ -741,8 +922,16 @@ _rs_export_to_file() { echo -e "${TAB}${BGN}$(translate "Duration:")${CL} ${BL}$(hb_human_elapsed "$elapsed")${CL}" echo -e "" msg_ok "$(translate "Export completed. The running system has not been modified.")" + echo -e "" + msg_success "$(translate "Press Enter to return to menu...")" + read -r + return 0 else msg_error "$(translate "Export failed.")" + hb_show_log "$log_file" "$(translate "Export error log")" + echo -e "" + msg_success "$(translate "Press Enter to return to menu...")" + read -r return 1 fi } @@ -842,7 +1031,10 @@ _rs_apply() { fi local backup_root - backup_root="/root/proxmenux-pre-restore/$(date +%Y%m%d_%H%M%S)" + # Pre-restore safety snapshot lives outside /root for the same + # reason as the cluster recovery dir — restoring /root with + # `rsync --delete` would otherwise wipe it mid-flow. + backup_root="/var/lib/proxmenux/pre-restore/$(date +%Y%m%d_%H%M%S)" mkdir -p "$backup_root" local applied=0 skipped=0 t_start elapsed @@ -857,9 +1049,16 @@ _rs_apply() { # Never restore cluster virtual filesystem data live. # Extract it for manual recovery in maintenance mode. + # Path note: this used to live under /root/proxmenux-recovery/, + # but a later iteration of the same loop applies /root from + # the backup with `rsync --delete`, which wipes anything + # under /root that isn't in the backup — including our + # freshly-extracted recovery dir. We now stage it under + # /var/lib/proxmenux/recovery/, which sits next to + # restore-pending/ and isn't touched by any path apply. if [[ "$rel" == etc/pve* ]] || [[ "$rel" == var/lib/pve-cluster* ]]; then if [[ -z "$cluster_recovery_root" ]]; then - cluster_recovery_root="/root/proxmenux-recovery/$(date +%Y%m%d_%H%M%S)" + cluster_recovery_root="/var/lib/proxmenux/recovery/$(date +%Y%m%d_%H%M%S)" mkdir -p "$cluster_recovery_root" fi mkdir -p "$cluster_recovery_root/$(dirname "$rel")" @@ -1012,11 +1211,33 @@ _rs_prompt_zfs_opt_in() { return 0 fi - local zfs_confirm_msg - zfs_confirm_msg="$(translate "This backup includes /etc/zfs. Include it in restore?")"$'\n\n'"$(translate "Only enable this if the target host and ZFS pool names match exactly.")" + # /etc/zfs/ on a Proxmox host ALWAYS contains package defaults + # (zfs-functions, zpool.d/, zed.d/) — they're shipped by the + # zfsutils-linux package and identical across PVE installs. + # Only zpool.cache (and the keys/ subdir) carry host-specific + # state, because zpool.cache references the source host's + # physical disks by GUID. Anything else is safe to restore. + local cache="$staging_root/rootfs/etc/zfs/zpool.cache" + if [[ ! -f "$cache" ]]; then + # No host-specific bits — restore defaults silently. + export HB_RESTORE_INCLUDE_ZFS=1 + return 0 + fi + + # zpool.cache IS present. Two cases: + # - Same host restore (recovery on the source machine) → quietly + # include; the cache is correct for this host by definition. + # - Cross-host restore → loud warning: pool GUIDs in the cache + # won't match the target's disks, and Proxmox would try to + # import non-existent pools at next boot. + local msg + if [[ "${HB_COMPAT_SAME_HOST:-0}" == "1" ]]; then + msg="$(translate "Backup includes /etc/zfs/zpool.cache. Restore it (same host detected)?")" + else + msg="$(translate "This backup includes /etc/zfs/zpool.cache (host-specific ZFS state).")"$'\n\n'"$(translate "Restore it ONLY if the target host has the same pools and disks as the source. Otherwise Proxmox may try to import non-existent pools at next boot.")" + fi if whiptail --title "$(translate "ZFS configuration")" \ - --yesno "$zfs_confirm_msg" \ - 11 76; then + --yesno "$msg" 12 78; then export HB_RESTORE_INCLUDE_ZFS=1 fi } @@ -1056,21 +1277,39 @@ _rs_install_pending_service_unit() { local onboot_script="$1" local unit_file="/etc/systemd/system/proxmenux-restore-onboot.service" + # `network-pre.target` is a passive target activated by + # systemd-networkd. On Proxmox the networking stack is + # `networking.service` from ifupdown2, NOT systemd-networkd, + # so network-pre.target is never reached — the original unit + # had `ConditionResult=no` at boot and the pending restore + # silently sat in `pending` state forever. + # + # The correct anchor on PVE is `networking.service`: we run + # before it (so we can rewrite /etc/network in time for + # ifupdown2 to read the new config) and we pull ourselves in + # via `multi-user.target` which IS always activated at boot. cat > "$unit_file" </dev/null 2>&1 \ + && command -v apt-get >/dev/null 2>&1; then + local cur_pkgs_file + cur_pkgs_file=$(mktemp) + apt-mark showmanual 2>/dev/null | sort -u > "$cur_pkgs_file" + local -a missing=() + mapfile -t missing < <(comm -23 <(sort -u "$pkglist") "$cur_pkgs_file") + rm -f "$cur_pkgs_file" + if [[ ${#missing[@]} -gt 0 ]]; then + echo + msg_info "$(translate "Installing") ${#missing[@]} $(translate "user-installed packages from backup...")" + stop_spinner + apt-get update -qq 2>&1 | sed -e 's/^/ /' | tail -2 + + # Pre-filter to packages apt actually knows about — + # otherwise a single typo or repo-renamed pkg in + # packages.manual.list (e.g. `lifnet-subnet-perl` from + # a hand-typo'd apt-mark) makes `apt-get install` exit + # with E_UNRESOLVABLE and the entire batch is skipped. + # We do this in two passes: first split into installable + # vs unknown, then run install on the installable set. + local -a installable=() unknown=() + local pkg + for pkg in "${missing[@]}"; do + if apt-cache show "$pkg" >/dev/null 2>&1; then + installable+=("$pkg") + else + unknown+=("$pkg") + fi + done + + if (( ${#installable[@]} > 0 )); then + # DEBIAN_FRONTEND=noninteractive + --force-conf prevents + # apt from blocking on `*** log2ram.conf (Y/I/N/O/D/Z) ?` + # type prompts (which would leave the package in + # half-installed `iU` state and ultimately produce the + # same boot-hang problem we're trying to FIX with this + # restore). Confnew/confold both work; we pick confold + # so the keepers from the BACKUP's restored configs win, + # matching what the operator implicitly asked for. + DEBIAN_FRONTEND=noninteractive \ + apt-get install -y \ + -o Dpkg::Options::="--force-confdef" \ + -o Dpkg::Options::="--force-confold" \ + "${installable[@]}" 2>&1 | sed -e 's/^/ /' | tail -10 + # PIPESTATUS[0] is the real exit code from apt-get; + # without this, the pipeline always reports tee's + # 0 and we'd lie about success. + local apt_rc=${PIPESTATUS[0]} + if (( apt_rc == 0 )); then + msg_ok "$(translate "Installed:") ${#installable[@]} $(translate "packages")" + else + msg_warn "$(translate "apt-get exited") ${apt_rc} — $(translate "some packages may have failed; see output above")" + fi + fi + if (( ${#unknown[@]} > 0 )); then + msg_warn "$(translate "Skipped, not in apt cache:") ${unknown[*]:0:6}$([[ ${#unknown[@]} -gt 6 ]] && echo " … (+ $((${#unknown[@]} - 6)) more)")" + echo -e "${TAB}${BL}$(translate "These were marked manual on the source host but apt-cache cannot resolve them now (typo, removed pkg, third-party repo not configured yet).")${CL}" + fi + fi + fi + + # ─ Guest configs — only in full strategies ──────────────── + if [[ "$include_guests" == "1" ]]; then + local nodes_root="$staging_root/rootfs/etc/pve/nodes" + if [[ -d "$nodes_root" ]]; then + local src_node_dir + src_node_dir=$(find "$nodes_root" -mindepth 1 -maxdepth 1 -type d 2>/dev/null | head -1) + if [[ -n "$src_node_dir" ]]; then + local -a lxc_confs=() qm_confs=() + [[ -d "$src_node_dir/lxc" ]] && mapfile -t lxc_confs < <(find "$src_node_dir/lxc" -maxdepth 1 -type f -name '*.conf' 2>/dev/null | sort) + [[ -d "$src_node_dir/qemu-server" ]] && mapfile -t qm_confs < <(find "$src_node_dir/qemu-server" -maxdepth 1 -type f -name '*.conf' 2>/dev/null | sort) + if [[ ${#lxc_confs[@]} -gt 0 || ${#qm_confs[@]} -gt 0 ]]; then + local cur_node target_lxc target_qm + cur_node=$(hostname) + target_lxc="/etc/pve/nodes/$cur_node/lxc" + target_qm="/etc/pve/nodes/$cur_node/qemu-server" + mkdir -p "$target_lxc" "$target_qm" 2>/dev/null + echo + msg_info "$(translate "Restoring guest configs (LXC + QEMU)...")" + stop_spinner + local copied=0 skipped=0 f vmid + for f in "${lxc_confs[@]}"; do + vmid=$(basename "$f" .conf) + if [[ -e "$target_lxc/$vmid.conf" ]]; then + ((skipped++)) + elif cp "$f" "$target_lxc/$vmid.conf" 2>/dev/null; then + ((copied++)) + fi + done + for f in "${qm_confs[@]}"; do + vmid=$(basename "$f" .conf) + if [[ -e "$target_qm/$vmid.conf" ]]; then + ((skipped++)) + elif cp "$f" "$target_qm/$vmid.conf" 2>/dev/null; then + ((copied++)) + fi + done + msg_ok "$(translate "Guest configs restored:") LXC+QEMU=$copied, $(translate "skipped (already exist)"):$skipped" + if (( copied > 0 )); then + echo -e "${TAB}${BL}$(translate "Use 'pct restore' / 'qmrestore' to recover their disks from your VM backups.")${CL}" + fi + fi + fi + fi + fi +} + + _rs_apply_menu() { local staging_root="$1" @@ -1608,8 +1987,11 @@ _rs_apply_menu() { _rs_run_custom_restore "$staging_root" && return 0 ;; 3) + # _rs_export_to_file owns its own end-of-flow + # (showing result + "Press Enter to return to menu") + # so we don't call _rs_finish_flow here — doing so + # would queue a second identical prompt. if _rs_export_to_file "$staging_root"; then - _rs_finish_flow return 0 fi ;; @@ -1652,9 +2034,18 @@ restore_menu() { esac if [[ $ok -eq 1 ]] && _rs_check_layout "$staging_root"; then - if _rs_apply_menu "$staging_root"; then - rm -rf "$staging_root" - return 0 + # Run the compatibility check BEFORE the apply menu so + # the operator sees PVE-version / hostname / network / + # storage drift up front. This also sets + # HB_COMPAT_SAME_HOST, which downstream prompts + # (_rs_prompt_zfs_opt_in) read to choose between the + # silent same-host path and the loud cross-host path. + hb_compat_check "$staging_root" + if hb_show_compat_report; then + if _rs_apply_menu "$staging_root"; then + rm -rf "$staging_root" + return 0 + fi fi fi diff --git a/scripts/backup_restore/backup_scheduler.sh b/scripts/backup_restore/backup_scheduler.sh index 26c87d35..333bacf3 100644 --- a/scripts/backup_restore/backup_scheduler.sh +++ b/scripts/backup_restore/backup_scheduler.sh @@ -278,10 +278,19 @@ _pick_job() { return 1 fi + # Build the menu rows. The loop variable is INTENTIONALLY named + # `_iter_id` (not `id`) — every caller passes "id" as $__out_var so + # the nameref below should point at the caller's local. A loop + # variable named `id` here would shadow it, and the nameref would + # silently write into _pick_job's own scope instead, leaving the + # caller with an empty string. That manifested as: + # ✓ Job timer enabled: (empty) + # run_scheduled_backup.sh: Usage: ... + # Both reported on 2026-06-07. local -a menu=() - local i=1 id - for id in "${ids[@]}"; do - menu+=("$i" "$id [$(_show_job_status "$id")]") + local i=1 _iter_id + for _iter_id in "${ids[@]}"; do + menu+=("$i" "$_iter_id [$(_show_job_status "$_iter_id")]") ((i++)) done local sel @@ -295,12 +304,98 @@ _pick_job() { return 0 } +# Common screen reset for any post-dialog action result. The +# `dialog` calls in this script leave their box drawn on screen +# even after the user has confirmed; without this reset, the +# subsequent msg_ok / msg_warn / "Press Enter" output renders +# in the bottom-left corner UNDER the leftover dialog box. +# show_proxmenux_logo already runs `clear` internally, so we +# don't add another one — the convention used across proxmenux +# (create_vm_menu.sh, config_menu.sh, menu_post_install.sh) is: +# show_proxmenux_logo → msg_title → result message +# Reported 2026-06-07 when the operator hit "Run job now" and +# saw "Job executed successfully" floating over the picker. +_render_action_screen() { + show_proxmenux_logo + msg_title "$1" +} + _job_run_now() { local id="" _pick_job "$(translate "Run job now")" id || return 1 + # Defensive guard against a future regression of the nameref-shadowing + # bug that left $id empty here on 2026-06-07. Without this, the runner + # gets called with no argument and emits "Usage: ... ". + if [[ -z "$id" ]]; then + _render_action_screen "$(translate "Run job now")" + msg_error "$(translate "Job selection returned empty id — aborting.")" + msg_success "$(translate "Press Enter to continue...")" + read -r + return 1 + fi + local runner="$LOCAL_SCRIPTS/backup_restore/run_scheduled_backup.sh" [[ ! -f "$runner" ]] && runner="$SCRIPT_DIR/run_scheduled_backup.sh" - if "$runner" "$id"; then + + # ── Visible execution ─────────────────────────────────── + # Clear the leftover dialog frame and announce what's about + # to happen, so the operator stops looking at a frozen + # picker. We then tail the runner's log file in the + # background so progress (or errors) are visible as they + # happen, instead of the user staring at a black screen. + # No msg_info banner between the title and the streaming + # log — the title already says we're running, the streamed + # `=== Scheduled backup job X started ===` is the better + # progress cue. + _render_action_screen "$(translate "Running backup job:") $id" + echo + + # Snapshot existing log files so we can identify the new one the + # runner is about to create (filename pattern is `${id}-${ts}.log`). + local existing_logs new_log="" + existing_logs="$(ls -1 "${LOG_DIR}/${id}-"*.log 2>/dev/null || true)" + + # Launch the runner in the background so we can tail its log + # while it's still writing. + "$runner" "$id" & + local runner_pid=$! + + # Wait up to ~10s for the new log file to appear, then start tail. + # On a small config-only backup the job may finish before we even + # find the log; that's fine, we just skip tailing. + local tail_pid="" + local _i + for _i in $(seq 1 20); do + local f + for f in "${LOG_DIR}/${id}-"*.log; do + [[ -f "$f" ]] || continue + if ! grep -qFx "$f" <<<"$existing_logs" 2>/dev/null; then + new_log="$f" + break 2 + fi + done + # Stop probing if the runner already exited. + kill -0 "$runner_pid" 2>/dev/null || break + sleep 0.5 + done + + if [[ -n "$new_log" ]]; then + tail -f "$new_log" & + tail_pid=$! + fi + + wait "$runner_pid" + local runner_exit=$? + + if [[ -n "$tail_pid" ]]; then + # Give tail a beat to flush the last buffered lines, then close it. + sleep 0.5 + kill "$tail_pid" 2>/dev/null || true + wait "$tail_pid" 2>/dev/null || true + fi + + echo + if [[ "$runner_exit" == "0" ]]; then msg_ok "$(translate "Job executed successfully.")" else msg_warn "$(translate "Job execution finished with errors. Check logs.")" @@ -312,11 +407,29 @@ _job_run_now() { _job_toggle() { local id="" _pick_job "$(translate "Enable/Disable job")" id || return 1 + if [[ -z "$id" ]]; then + _render_action_screen "$(translate "Enable/Disable job")" + msg_error "$(translate "Job selection returned empty id — aborting.")" + msg_success "$(translate "Press Enter to continue...")" + read -r + return 1 + fi + + # Decide the action label up front so the title reflects what we + # actually just did (enable vs disable). + local action_label if systemctl is-enabled --quiet "proxmenux-backup-${id}.timer" >/dev/null 2>&1; then systemctl disable --now "proxmenux-backup-${id}.timer" >/dev/null 2>&1 || true - msg_warn "$(translate "Job timer disabled:") $id" + action_label="disabled" else systemctl enable --now "proxmenux-backup-${id}.timer" >/dev/null 2>&1 || true + action_label="enabled" + fi + + _render_action_screen "$(translate "Enable/Disable job")" + if [[ "$action_label" == "disabled" ]]; then + msg_warn "$(translate "Job timer disabled:") $id" + else msg_ok "$(translate "Job timer enabled:") $id" fi msg_success "$(translate "Press Enter to continue...")" @@ -326,6 +439,17 @@ _job_toggle() { _job_delete() { local id="" _pick_job "$(translate "Delete job")" id || return 1 + # An empty id here would build malformed unit paths like + # /etc/systemd/system/proxmenux-backup-.timer, and the subsequent + # rm -f would silently no-op against bogus paths — making it LOOK + # like a successful delete while the real job stays untouched. + if [[ -z "$id" ]]; then + _render_action_screen "$(translate "Delete job")" + msg_error "$(translate "Job selection returned empty id — aborting.")" + msg_success "$(translate "Press Enter to continue...")" + read -r + return 1 + fi if ! whiptail --title "$(translate "Confirm delete")" \ --yesno "$(translate "Delete scheduled backup job?")"$'\n\n'"ID: ${id}" 10 66; then return 1 @@ -333,6 +457,8 @@ _job_delete() { systemctl disable --now "proxmenux-backup-${id}.timer" >/dev/null 2>&1 || true rm -f "$(_service_file "$id")" "$(_timer_file "$id")" "$(_job_file "$id")" "$(_job_paths_file "$id")" systemctl daemon-reload >/dev/null 2>&1 || true + + _render_action_screen "$(translate "Delete job")" msg_ok "$(translate "Job deleted:") $id" msg_success "$(translate "Press Enter to continue...")" read -r diff --git a/scripts/backup_restore/collectors/build_manifest.sh b/scripts/backup_restore/collectors/build_manifest.sh new file mode 100644 index 00000000..1c60dcd0 --- /dev/null +++ b/scripts/backup_restore/collectors/build_manifest.sh @@ -0,0 +1,126 @@ +#!/usr/bin/env bash +# ========================================================== +# ProxMenux backup manifest orchestrator +# ========================================================== +# Composes the six collectors into one manifest.json that +# validates against schema/manifest.schema.json. Designed to +# be called by backup_host.sh during a backup run. Read-only +# (no side effects on the host). +# +# Usage: +# build_manifest.sh [--paths-archived ...] +# build_manifest.sh --validate (re-runs the JSON Schema validation) +# +# Stdout: pretty-printed manifest JSON. +# Stderr: progress + warnings. +# ========================================================== +set -euo pipefail + +COLLECTORS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SCHEMA_FILE="$COLLECTORS_DIR/../schema/manifest.schema.json" + +# Parse flags +paths_archived='null' +do_validate=0 +while [[ $# -gt 0 ]]; do + case "$1" in + --paths-archived) + shift + tmp='[]' + while [[ $# -gt 0 && "$1" != --* ]]; do + tmp="$(jq --argjson a "$tmp" --arg p "$1" -n '$a + [$p]')" + shift + done + paths_archived="$tmp" + ;; + --validate) + do_validate=1; shift ;; + -h|--help) + sed -nE '/^# Usage:/,/^# Stderr:/p' "$0" | sed -E 's/^# ?//' >&2 + exit 0 + ;; + *) shift ;; + esac +done + +# Run each collector. If a collector fails we fall back to a safe default +# (empty array / null object) and warn — the manifest is still useful even +# if one section is incomplete. +run_collector() { + local name="$1" fallback="$2" + local out + if out="$(bash "$COLLECTORS_DIR/$name" 2>>/tmp/proxmenux-manifest-stderr.log)"; then + printf '%s' "$out" + else + printf 'warning: collector %s failed; using fallback\n' "$name" >&2 + printf '%s' "$fallback" + fi +} + +# Empty error log first so we can attribute failures to this run. +: >/tmp/proxmenux-manifest-stderr.log + +source_host="$(run_collector collect_source_host.sh '{}')" +hardware_inventory="$(run_collector collect_hardware.sh '{"gpu":[],"tpu":[],"nic":[],"wireless":[]}')" +storage_inventory="$(run_collector collect_storage.sh '{"zfs_pools":[],"lvm":{"vgs":[]},"physical_disks":[],"pve_storage_cfg":[],"mounts":[]}')" +installed_components="$(run_collector collect_proxmenux_state.sh '[]')" +kernel_params="$(run_collector collect_kernel.sh '{"cmdline_extra":[],"modules_loaded_at_boot":[],"modprobe_d_files":[]}')" +guests="$(run_collector collect_guests.sh '{"vms":[],"lxcs":[]}')" + +created_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)" + +# Compose the final manifest. The wrapper key matches the schema: +# the top level is a single "proxmenux_backup_manifest" object. +manifest="$(jq -n \ + --arg created_at "$created_at" \ + --arg created_by "proxmenux-host-backup/1.3.0" \ + --argjson source_host "$source_host" \ + --argjson hardware "$hardware_inventory" \ + --argjson storage "$storage_inventory" \ + --argjson components "$installed_components" \ + --argjson kernel "$kernel_params" \ + --argjson guests "$guests" \ + --argjson paths_archived "$paths_archived" \ + '{ + proxmenux_backup_manifest: { + schema_version: 1, + created_at: $created_at, + created_by: $created_by, + source_host: $source_host, + hardware_inventory: $hardware, + storage_inventory: $storage, + proxmenux_installed_components: $components, + kernel_params: $kernel, + vms_lxcs_at_backup: $guests, + backup_metadata: { + encrypted: false, + encryption_format: null, + compression: "zstd", + paths_archived: $paths_archived, + sha256_archive: null, + size_bytes: null + } + } + }')" + +# Optional validation step. If python3 + jsonschema are available, run +# them; otherwise silently skip (validation is mostly a developer aid). +if [[ "$do_validate" == 1 ]]; then + if command -v python3 >/dev/null 2>&1 && python3 -c 'import jsonschema' 2>/dev/null; then + printf '%s' "$manifest" | python3 -c " +import json, sys, jsonschema +schema = json.load(open('$SCHEMA_FILE')) +inst = json.load(sys.stdin) +try: + jsonschema.validate(instance=inst, schema=schema) + print('manifest: validates against schema', file=sys.stderr) +except jsonschema.exceptions.ValidationError as e: + print(f'manifest: SCHEMA VIOLATION at {list(e.absolute_path)}: {e.message}', file=sys.stderr) + sys.exit(1) +" + else + printf 'manifest: jsonschema python module not present; skipping validation\n' >&2 + fi +fi + +printf '%s\n' "$manifest" diff --git a/scripts/backup_restore/collectors/collect_guests.sh b/scripts/backup_restore/collectors/collect_guests.sh new file mode 100644 index 00000000..fee3e74a --- /dev/null +++ b/scripts/backup_restore/collectors/collect_guests.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# ========================================================== +# ProxMenux backup manifest collector — vms_lxcs_at_backup +# ========================================================== +# Enumerates VMs (qm list) and LXCs (pct list) on this PVE node. +# Read-only; emits the metadata only — actual VM/LXC data is +# the responsibility of vzdump / PBS, not this manifest. +# Schema: scripts/backup_restore/schema/manifest.schema.json +# ========================================================== +set -euo pipefail + +vms='[]' +lxcs='[]' + +# ── VMs (qm list) ── +# Output: +# VMID NAME STATUS MEM(MB) BOOTDISK(GB) PID +# 100 Alpine-Linux-3-21 stopped 4096 0.00 0 +# Header line starts with VMID; we skip it. +if command -v qm >/dev/null 2>&1; then + while IFS= read -r line; do + [[ -z "$line" ]] && continue + # Skip the header + [[ "$line" =~ ^[[:space:]]*VMID[[:space:]] ]] && continue + # Parse positionally. NAME can contain spaces, but `qm list` pads/columns + # them, so we use fixed positions: VMID at col 1, STATUS as the 3rd + # whitespace-delimited token from the END (mem, bootdisk, pid are after). + vmid="$(printf '%s' "$line" | awk '{print $1}')" + [[ "$vmid" =~ ^[0-9]+$ ]] || continue + # Strip trailing PID + BOOTDISK + MEM(MB) + STATUS to extract the NAME. + # rev → cut → rev technique: + trailing="$(printf '%s' "$line" | awk '{printf "%s %s %s %s", $(NF-3), $(NF-2), $(NF-1), $NF}')" + status="$(printf '%s' "$trailing" | awk '{print $1}')" + memory_mb="$(printf '%s' "$trailing" | awk '{print $2}')" + bootdisk_gb="$(printf '%s' "$trailing" | awk '{print $3}')" + # Name: drop first column (vmid) and last 4 columns + name="$(printf '%s' "$line" | awk '{$1=""; for(i=NF-3;i<=NF;i++) $i=""; sub(/^[[:space:]]+/,""); sub(/[[:space:]]+$/,""); print}')" + case "$status" in + running|stopped|paused) ;; + *) status="stopped" ;; + esac + + vms="$(jq --argjson acc "$vms" \ + --argjson vmid "$vmid" \ + --arg name "$name" \ + --argjson memory_mb "${memory_mb:-0}" \ + --argjson bootdisk_gb "${bootdisk_gb:-0}" \ + --arg status "$status" \ + -n ' + $acc + [{ + vmid: $vmid, + name: $name, + memory_mb: $memory_mb, + bootdisk_gb: $bootdisk_gb, + status: $status, + config_file: ("configs/qemu-server/" + ($vmid|tostring) + ".conf") + }] + ')" + done < <(qm list 2>/dev/null || true) +fi + +# ── LXCs (pct list) ── +# Output: +# VMID Status Lock Name +# 101 running alpine +# Header line starts with VMID; we skip it. +if command -v pct >/dev/null 2>&1; then + while IFS= read -r line; do + [[ -z "$line" ]] && continue + [[ "$line" =~ ^[[:space:]]*VMID[[:space:]] ]] && continue + vmid="$(printf '%s' "$line" | awk '{print $1}')" + [[ "$vmid" =~ ^[0-9]+$ ]] || continue + status="$(printf '%s' "$line" | awk '{print $2}')" + # Lock column is sparse; name is always last positional non-empty token + name="$(printf '%s' "$line" | awk '{print $NF}')" + case "$status" in + running|stopped) ;; + *) status="stopped" ;; + esac + + lxcs="$(jq --argjson acc "$lxcs" \ + --argjson vmid "$vmid" \ + --arg name "$name" \ + --arg status "$status" \ + -n ' + $acc + [{ + vmid: $vmid, + name: $name, + status: $status, + config_file: ("configs/lxc/" + ($vmid|tostring) + ".conf") + }] + ')" + done < <(pct list 2>/dev/null || true) +fi + +jq -n --argjson vms "$vms" --argjson lxcs "$lxcs" \ + '{ vms: $vms, lxcs: $lxcs }' diff --git a/scripts/backup_restore/collectors/collect_hardware.sh b/scripts/backup_restore/collectors/collect_hardware.sh new file mode 100644 index 00000000..2db1335e --- /dev/null +++ b/scripts/backup_restore/collectors/collect_hardware.sh @@ -0,0 +1,222 @@ +#!/usr/bin/env bash +# ========================================================== +# ProxMenux backup manifest collector — hardware_inventory +# ========================================================== +# Detects GPUs (with vendor → ProxMenux installer mapping), +# TPUs (Coral PCIe/USB), NICs (with bridge membership), and +# Wireless interfaces. Read-only. Schema: +# scripts/backup_restore/schema/manifest.schema.json +# ========================================================== +set -euo pipefail + +# Vendor → installer path mapping. Update when ProxMenux adds new +# installers for hardware that depends on out-of-tree drivers. +# Vendors WITHOUT a mapping get null (e.g. Intel/AMD iGPUs work with +# in-tree drivers, no special installer needed). +gpu_installer_for() { + case "$1" in + NVIDIA) echo "scripts/gpu_tpu/nvidia_installer.sh" ;; + *) echo "" ;; + esac +} + +# ── GPUs ── +# lspci -nnD outputs: +# 0000:01:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP107GL [Quadro P620] [10de:1cb6] (rev a1) +# We pick anything classified as VGA/3D/Display (display controllers). +gpu_array='[]' +while IFS= read -r line; do + [[ -z "$line" ]] && continue + pci_address="$(printf '%s' "$line" | awk '{print $1}')" + pci_id="$(printf '%s' "$line" | grep -oE '\[[0-9a-f]{4}:[0-9a-f]{4}\]' | tail -1 | tr -d '[]')" + # Description: everything between the "controller]:" header and the + # final "[pci_id]" tag. For AMD this includes the [AMD/ATI] tag; for + # NVIDIA/Intel it's just vendor + model. + desc="$(printf '%s' "$line" | sed -nE "s@.*\]:[[:space:]]*(.*)[[:space:]]+\[[0-9a-f]{4}:[0-9a-f]{4}\].*@\1@p")" + + # Vendor classification + case "$desc" in + *NVIDIA*) vendor="NVIDIA" ;; + *"Advanced Micro Devices"*|*AMD*) vendor="AMD" ;; + *"Intel Corporation"*|*Intel*) vendor="Intel" ;; + *) vendor="Other" ;; + esac + + # Model: strip every known vendor prefix from desc. Order matters — + # the longest specific prefix (AMD's "Inc. [AMD/ATI]") must come before + # the generic short one. + model="$(printf '%s' "$desc" | sed -E ' + s/^Advanced Micro Devices, Inc\. \[AMD\/ATI\][[:space:]]+// + s/^Advanced Micro Devices(, Inc\.)?[[:space:]]+// + s/^NVIDIA Corporation[[:space:]]+// + s/^Intel Corporation[[:space:]]+// + s/[[:space:]]+$// + ')" + # Kernel driver in use (may be empty if module not loaded yet) + kernel_driver="$(lspci -nnks "$pci_address" 2>/dev/null | awk -F: '/Kernel driver in use/{sub(/^[ \t]+/,"",$2); print $2; exit}')" + # Passthrough eligible if the GPU is bound to vfio-pci OR it's a discrete + # secondary GPU (not the primary console). Pragmatic heuristic: discrete + # GPUs are usually eligible; iGPUs (Intel HD/UHD, AMD APU iGPUs) usually not + # because they drive the host console. + passthrough_eligible=false + case "$kernel_driver" in + vfio-pci) passthrough_eligible=true ;; + nvidia|nouveau) passthrough_eligible=true ;; # discrete by definition + esac + + # ProxMenux installer for this GPU vendor + proxmenux_installer="$(gpu_installer_for "$vendor")" + + # Installed driver version from the managed_installs registry + installed_driver_version="" + if [[ "$vendor" == "NVIDIA" ]] && [[ -f /usr/local/share/proxmenux/managed_installs.json ]]; then + installed_driver_version="$(jq -r ' + .items[] + | select(.removed_at == null and .type == "nvidia_xfree86") + | .current_version // "" + ' /usr/local/share/proxmenux/managed_installs.json 2>/dev/null | head -1)" + fi + + gpu_array="$(jq --argjson acc "$gpu_array" \ + --arg vendor "$vendor" \ + --arg model "$model" \ + --arg pci_address "$pci_address" \ + --arg pci_id "$pci_id" \ + --arg kernel_driver "$kernel_driver" \ + --argjson passthrough_eligible "$passthrough_eligible" \ + --arg proxmenux_installer "$proxmenux_installer" \ + --arg installed_driver_version "$installed_driver_version" \ + -n ' + $acc + [{ + vendor: $vendor, + model: $model, + pci_address: $pci_address, + pci_id: $pci_id, + kernel_driver: (if $kernel_driver == "" then null else $kernel_driver end), + passthrough_eligible: $passthrough_eligible, + proxmenux_installer: (if $proxmenux_installer == "" then null else $proxmenux_installer end), + installed_driver_version: (if $installed_driver_version == "" then null else $installed_driver_version end) + }] + ')" +done < <(lspci -nnD 2>/dev/null | grep -E 'VGA compatible|3D controller|Display controller' || true) + +# ── TPUs (Google Coral) ── +# PCIe variant: vendor 1ac1 (Global Unichip Corp) is the Coral M.2 / mPCIe. +# USB variant: vendor 18d1 product 9302 (Google). +tpu_array='[]' + +# PCIe Coral +while IFS= read -r line; do + [[ -z "$line" ]] && continue + pci_address="$(printf '%s' "$line" | awk '{print $1}')" + pci_id="$(printf '%s' "$line" | grep -oE '\[[0-9a-f]{4}:[0-9a-f]{4}\]' | tail -1 | tr -d '[]')" + tpu_array="$(jq --argjson acc "$tpu_array" \ + --arg model "Coral PCIe" \ + --arg pci_address "$pci_address" \ + -n ' + $acc + [{ + vendor: "Google", + model: $model, + bus: "PCIe", + pci_address: $pci_address, + proxmenux_installer: "scripts/gpu_tpu/install_coral.sh", + installed_version: null + }] + ')" +done < <(lspci -nnD 2>/dev/null | grep -iE '1ac1:|global unichip' || true) + +# USB Coral +if command -v lsusb >/dev/null 2>&1; then + if lsusb 2>/dev/null | grep -qE '18d1:9302|Google.*Coral'; then + tpu_array="$(jq --argjson acc "$tpu_array" \ + -n ' + $acc + [{ + vendor: "Google", + model: "Coral USB", + bus: "USB", + pci_address: null, + proxmenux_installer: "scripts/gpu_tpu/install_coral.sh", + installed_version: null + }] + ')" + fi +fi + +# ── NICs ── +# We want PHYSICAL interfaces (skip lo, veth*, tap*, fwln*, fwbr*, fwpr*). +# Also distinguish wired from wireless. +nic_array='[]' +wireless_array='[]' + +# Map each interface → its bridge by walking /sys/class/net//brif/. +# We use bash glob expansion instead of `find -path` because find doesn't +# follow the symlinks under /sys cleanly. +declare -A bridge_for +for brif_dir in /sys/class/net/*/brif; do + [[ -d "$brif_dir" ]] || continue + bridge="$(basename "$(dirname "$brif_dir")")" + for member_link in "$brif_dir"/*; do + [[ -e "$member_link" ]] || continue + member="$(basename "$member_link")" + bridge_for["$member"]="$bridge" + done +done + +# Iterate over each physical net device +for dev_path in /sys/class/net/*; do + ifname="$(basename "$dev_path")" + case "$ifname" in + lo|veth*|tap*|fwln*|fwbr*|fwpr*|vmbr*|bond*) continue ;; + esac + # Bridges and bonds we record as their own thing; PHY interfaces only here. + # Detect virtual interfaces (no device symlink → virtual) + [[ ! -e "$dev_path/device" ]] && continue + + mac="$(cat "$dev_path/address" 2>/dev/null || echo "")" + [[ -z "$mac" ]] && continue + operstate="$(cat "$dev_path/operstate" 2>/dev/null | tr '[:lower:]' '[:upper:]' || echo "UNKNOWN")" + case "$operstate" in + UP|DOWN) ;; + *) operstate="UNKNOWN" ;; + esac + kernel_driver="$(basename "$(readlink "$dev_path/device/driver" 2>/dev/null || echo "")")" + + # Wireless detection + if [[ -d "$dev_path/wireless" ]] || [[ -d "$dev_path/phy80211" ]]; then + wireless_array="$(jq --argjson acc "$wireless_array" \ + --arg ifname "$ifname" \ + --arg mac "$mac" \ + -n '$acc + [{ifname: $ifname, mac: $mac}]')" + continue + fi + + # Bridge membership: which vmbr* contains this NIC? + in_bridges_json='[]' + if [[ -n "${bridge_for[$ifname]:-}" ]]; then + in_bridges_json="$(jq -n --arg b "${bridge_for[$ifname]}" '[$b]')" + fi + + nic_array="$(jq --argjson acc "$nic_array" \ + --arg ifname "$ifname" \ + --arg mac "$mac" \ + --arg kernel_driver "$kernel_driver" \ + --argjson in_bridges "$in_bridges_json" \ + --arg operstate "$operstate" \ + -n ' + $acc + [{ + ifname: $ifname, + mac: $mac, + kernel_driver: (if $kernel_driver == "" then null else $kernel_driver end), + in_bridges: $in_bridges, + operstate: $operstate + }] + ')" +done + +# Compose the final object +jq -n \ + --argjson gpu "$gpu_array" \ + --argjson tpu "$tpu_array" \ + --argjson nic "$nic_array" \ + --argjson wireless "$wireless_array" \ + '{ gpu: $gpu, tpu: $tpu, nic: $nic, wireless: $wireless }' diff --git a/scripts/backup_restore/collectors/collect_kernel.sh b/scripts/backup_restore/collectors/collect_kernel.sh new file mode 100644 index 00000000..0648ef37 --- /dev/null +++ b/scripts/backup_restore/collectors/collect_kernel.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# ========================================================== +# ProxMenux backup manifest collector — kernel_params +# ========================================================== +# /proc/cmdline (filtered to user-meaningful extras), /etc/modules, +# and /etc/modprobe.d/ files with custom directives. Read-only. +# Schema: scripts/backup_restore/schema/manifest.schema.json +# ========================================================== +set -euo pipefail + +# ── cmdline_extra ── +# /proc/cmdline contains the kernel command line the bootloader passed. +# We strip the boring boilerplate (BOOT_IMAGE, initrd, root, ro, rw, quiet, +# splash, boot=zfs, rootflags) so the manifest captures only the user- +# meaningful tweaks (intel_iommu, iommu=pt, hugepages, pcie_acs_override, +# acpi=off, etc.). These are the bits a restore wizard cares about. +cmdline_extra='[]' +if [[ -r /proc/cmdline ]]; then + raw_cmdline="$(cat /proc/cmdline)" + for token in $raw_cmdline; do + case "$token" in + BOOT_IMAGE=*|initrd=*|root=*|ro|rw|quiet|splash|boot=*|rootflags=*) + ;; # boilerplate, drop + *) + cmdline_extra="$(jq --argjson acc "$cmdline_extra" --arg t "$token" -n '$acc + [$t]')" + ;; + esac + done +fi + +# ── modules_loaded_at_boot ── +# /etc/modules lists modules systemd-modules-load.service inserts on boot. +modules_at_boot='[]' +if [[ -r /etc/modules ]]; then + while IFS= read -r mod; do + # Strip comments and inline comments + mod="${mod%%#*}" + mod="$(printf '%s' "$mod" | xargs)" + [[ -z "$mod" ]] && continue + modules_at_boot="$(jq --argjson acc "$modules_at_boot" --arg m "$mod" -n '$acc + [$m]')" + done < /etc/modules +fi + +# ── modprobe_d_files ── +# /etc/modprobe.d/*.conf files. We emit the path of every file that +# contains at least one `options`, `blacklist`, `install`, `alias`, or +# `softdep` directive — i.e. anything that has actual effect. Files that +# are empty or pure comments aren't worth tracking. +modprobe_files='[]' +if [[ -d /etc/modprobe.d ]]; then + for f in /etc/modprobe.d/*.conf; do + [[ -r "$f" ]] || continue + if grep -qE '^[[:space:]]*(options|blacklist|install|alias|softdep)[[:space:]]' "$f" 2>/dev/null; then + modprobe_files="$(jq --argjson acc "$modprobe_files" --arg p "$f" -n '$acc + [$p]')" + fi + done +fi + +jq -n \ + --argjson cmdline_extra "$cmdline_extra" \ + --argjson modules_loaded "$modules_at_boot" \ + --argjson modprobe_files "$modprobe_files" \ + '{ + cmdline_extra: $cmdline_extra, + modules_loaded_at_boot: $modules_loaded, + modprobe_d_files: $modprobe_files + }' diff --git a/scripts/backup_restore/collectors/collect_proxmenux_state.sh b/scripts/backup_restore/collectors/collect_proxmenux_state.sh new file mode 100644 index 00000000..224335df --- /dev/null +++ b/scripts/backup_restore/collectors/collect_proxmenux_state.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +# ========================================================== +# ProxMenux backup manifest collector — proxmenux_installed_components +# ========================================================== +# Reads ProxMenux's managed_installs registry + post-install +# tools marker file and emits the installed components array. +# Read-only. Schema: +# scripts/backup_restore/schema/manifest.schema.json +# ========================================================== +set -euo pipefail + +REGISTRY="/usr/local/share/proxmenux/managed_installs.json" +INSTALLED_TOOLS="/usr/local/share/proxmenux/installed_tools.json" + +components='[]' + +# ── managed_installs registry ── +# Each entry already carries the installer path under `menu_script`, +# so we trust the registry as the single source of truth. We skip LXC +# entries because containers are restored via vzdump, not via the +# host-config restore path. +if [[ -r "$REGISTRY" ]]; then + while IFS= read -r item; do + [[ -z "$item" ]] && continue + id="$(printf '%s' "$item" | jq -r '.id')" + type="$(printf '%s' "$item" | jq -r '.type // ""')" + version="$(printf '%s' "$item" | jq -r '.current_version // ""')" + # menu_script in the registry is null for components that handle their + # own update lifecycle (e.g. OCI apps via the secure-gateway runtime). + # We keep that null forward: restore won't try to reinstall those — + # the user reconfigures them after restore. + installer="$(printf '%s' "$item" | jq -r '.menu_script // ""')" + + components="$(jq --argjson acc "$components" \ + --arg id "$id" --arg type "$type" --arg version "$version" --arg installer "$installer" \ + -n ' + $acc + [{ + id: $id, + type: $type, + version_at_backup: (if $version == "" then null else $version end), + proxmenux_installer: (if $installer == "" then null else $installer end), + applied_settings: [] + }] + ')" + done < <(jq -c '.items[]? | select(.removed_at == null) | select(.type != "lxc")' "$REGISTRY" 2>/dev/null || true) +fi + +# ── installed_tools.json (post-install optimizations) ── +# Format: array of {name: ..., installed_at: ...} or similar. The exact +# shape varies across ProxMenux versions; we emit one synthetic component +# named "post_install_optimizations" with the applied_settings list. +if [[ -r "$INSTALLED_TOOLS" ]]; then + applied_settings="$(jq -c ' + if type == "object" then + (.tools // .installed // [] | map(.name // .id // tostring)) + elif type == "array" then + map(.name // .id // tostring) + else [] + end + ' "$INSTALLED_TOOLS" 2>/dev/null || echo '[]')" + + # Only emit if we have at least one applied setting — otherwise the + # component would be noise. + count="$(printf '%s' "$applied_settings" | jq 'length' 2>/dev/null || echo 0)" + if [[ "${count:-0}" -gt 0 ]]; then + components="$(jq --argjson acc "$components" --argjson s "$applied_settings" \ + -n ' + $acc + [{ + id: "post_install_optimizations", + type: "proxmenux_post_install", + version_at_backup: null, + proxmenux_installer: "scripts/post_install/customizable_post_install.sh", + applied_settings: $s + }] + ')" + fi +fi + +# Output: bare array (not wrapped in an object — the orchestrator places +# this under .proxmenux_installed_components). +printf '%s\n' "$components" diff --git a/scripts/backup_restore/collectors/collect_source_host.sh b/scripts/backup_restore/collectors/collect_source_host.sh new file mode 100644 index 00000000..e105a9b5 --- /dev/null +++ b/scripts/backup_restore/collectors/collect_source_host.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +# ========================================================== +# ProxMenux backup manifest collector — source_host +# ========================================================== +# Emits the `source_host` section of the manifest as JSON to +# stdout. Read-only; no side effects. Schema: +# scripts/backup_restore/schema/manifest.schema.json +# ========================================================== +set -euo pipefail + +# ── pve_version_full / pve_version ── +# pveversion's first line is like: +# pve-manager/9.2.2/b9984c6d90a4bd80 (running kernel: 7.0.2-6-pve) +pve_version_full="" +pve_version="" +if command -v pveversion >/dev/null 2>&1; then + pve_version_full="$(pveversion 2>/dev/null | head -1 || true)" + # Extract the X.Y.Z between "pve-manager/" and "/" + pve_version="$(printf '%s\n' "$pve_version_full" | sed -nE 's@^pve-manager/([0-9.]+)/.*@\1@p')" +fi + +# ── pbs_version ── +# PBS is a separate package. If proxmox-backup-manager exists, host has PBS role. +pbs_version="" +if command -v proxmox-backup-manager >/dev/null 2>&1; then + pbs_version="$(proxmox-backup-manager versions 2>/dev/null | awk '/^proxmox-backup-server/{print $2; exit}' || true)" +fi + +# ── roles ── +roles_json='[]' +if [[ -n "$pve_version" && -n "$pbs_version" ]]; then + roles_json='["pve","pbs"]' +elif [[ -n "$pve_version" ]]; then + roles_json='["pve"]' +elif [[ -n "$pbs_version" ]]; then + roles_json='["pbs"]' +else + # No PVE, no PBS — exit with the unknown sentinel. Caller decides + # whether to abort or generate a system-only manifest. + roles_json='[]' +fi + +# ── kernel, boot_mode, root_fs ── +kernel="$(uname -r)" +if [[ -d /sys/firmware/efi ]]; then + boot_mode="efi" +else + boot_mode="bios" +fi +root_fs="$(findmnt -no FSTYPE / 2>/dev/null || echo ext4)" + +# ── CPU model / arch ── +cpu_model="$(lscpu 2>/dev/null | awk -F: '/^Model name/{sub(/^[ \t]+/, "", $2); print $2; exit}')" +cpu_arch="$(uname -m)" +# Normalize to schema enum +case "$cpu_arch" in + x86_64|amd64) cpu_arch="x86_64" ;; + aarch64|arm64) cpu_arch="aarch64" ;; +esac + +# ── memory_kb ── +memory_kb="$(awk '/^MemTotal:/{print $2; exit}' /proc/meminfo 2>/dev/null || echo 0)" + +# ── subscription_status ── +subscription_status="" +if command -v pvesubscription >/dev/null 2>&1; then + subscription_status="$(pvesubscription get 2>/dev/null | awk -F: '/^status:/{sub(/^[ \t]+/,"",$2); print $2; exit}')" +fi + +# Build JSON. Use --arg for strings (always quoted), --argjson for +# numbers/arrays/null. Empty strings → null per schema convention. +jq -n \ + --arg hostname "$(hostname)" \ + --arg pve_version "$pve_version" \ + --arg pve_version_full "$pve_version_full" \ + --arg pbs_version "$pbs_version" \ + --argjson roles "$roles_json" \ + --arg kernel "$kernel" \ + --arg boot_mode "$boot_mode" \ + --arg root_fs "$root_fs" \ + --arg cpu_model "$cpu_model" \ + --arg cpu_arch "$cpu_arch" \ + --argjson memory_kb "$memory_kb" \ + --arg subscription_status "$subscription_status" \ + '{ + hostname: $hostname, + pve_version: (if $pve_version == "" then null else $pve_version end), + pve_version_full: (if $pve_version_full == "" then null else $pve_version_full end), + pbs_version: (if $pbs_version == "" then null else $pbs_version end), + roles: $roles, + kernel: $kernel, + boot_mode: $boot_mode, + root_fs: $root_fs, + cpu_model: $cpu_model, + cpu_arch: $cpu_arch, + memory_kb: $memory_kb, + subscription_status: (if $subscription_status == "" then null else $subscription_status end) + }' diff --git a/scripts/backup_restore/collectors/collect_storage.sh b/scripts/backup_restore/collectors/collect_storage.sh new file mode 100644 index 00000000..c2e8dbdf --- /dev/null +++ b/scripts/backup_restore/collectors/collect_storage.sh @@ -0,0 +1,252 @@ +#!/usr/bin/env bash +# ========================================================== +# ProxMenux backup manifest collector — storage_inventory +# ========================================================== +# ZFS pools (with stable by-id devices), LVM VGs + thin pools, +# physical disks, PVE storage.cfg, and external mounts. +# Read-only. Schema: +# scripts/backup_restore/schema/manifest.schema.json +# ========================================================== +set -euo pipefail + +# ── ZFS pools ── +zfs_pools='[]' +if command -v zpool >/dev/null 2>&1; then + while IFS= read -r pool; do + [[ -z "$pool" ]] && continue + # type: parse zpool status — first vdev line after 'config:' header. + # Single-device pool shows the device directly; mirror/raidz prefix the + # vdev type. We look at the indented children list. + pool_type="single" + devices='[]' + # `zpool status -P` outputs full /dev/disk/by-id/... paths for the + # member disks. We isolate the first whitespace-delimited token on + # each child line and decide: + # - vdev type lines (mirror-0, raidz1-0, stripe, ...) → pool type + # - leaf device lines (/dev/disk/by-id/* or /dev/sd*) → membership + while IFS= read -r vdev_line; do + token="$(printf '%s' "$vdev_line" | awk '{print $1}')" + [[ -z "$token" || "$token" == "NAME" || "$token" == "$pool" ]] && continue + case "$token" in + mirror-*) pool_type="mirror" ;; + raidz1-*) pool_type="raidz1" ;; + raidz2-*) pool_type="raidz2" ;; + raidz3-*) pool_type="raidz3" ;; + stripe-*) pool_type="stripe" ;; + /dev/disk/by-id/*) + # Strip the /dev/disk/by-id/ prefix for the schema field; + # leave any -partN suffix in place — the restore wizard uses + # the exact same string to look the disk back up. + dev_name="${token#/dev/disk/by-id/}" + devices="$(jq --argjson acc "$devices" --arg d "$dev_name" -n '$acc + [$d]')" + ;; + /dev/*) + # Fallback: ZFS pool created with raw /dev/sdX paths. Record + # them as-is; restore will need to remap manually. + devices="$(jq --argjson acc "$devices" --arg d "$token" -n '$acc + [$d]')" + ;; + esac + done < <(zpool status -P "$pool" 2>/dev/null | awk '/^config:/{flag=1; next} /^errors:/{flag=0} flag') + + size_bytes="$(zpool list -H -p -o size "$pool" 2>/dev/null || echo 0)" + health="$(zpool list -H -o health "$pool" 2>/dev/null || echo UNKNOWN)" + compression="$(zfs get -H -o value compression "$pool" 2>/dev/null || echo "")" + mountpoint="$(zfs get -H -o value mountpoint "$pool" 2>/dev/null || echo "")" + + zfs_pools="$(jq --argjson acc "$zfs_pools" \ + --arg name "$pool" \ + --arg type "$pool_type" \ + --argjson devices "$devices" \ + --arg mountpoint "$mountpoint" \ + --arg compression "$compression" \ + --argjson size_bytes "${size_bytes:-0}" \ + --arg health "$health" \ + -n ' + $acc + [{ + name: $name, + type: $type, + devices_by_id: $devices, + mountpoint: $mountpoint, + compression: $compression, + size_bytes: $size_bytes, + health: $health + }] + ')" + done < <(zpool list -H -o name 2>/dev/null || true) +fi + +# ── LVM VGs + thin pools ── +lvm_vgs='[]' +if command -v vgs >/dev/null 2>&1; then + # vgs --reportformat json --units b is reliable in lvm2 ≥ 2.02 + vg_json="$(vgs --reportformat json --units b --noheadings -o vg_name,vg_size 2>/dev/null || echo '{}')" + while IFS= read -r vg_name; do + [[ -z "$vg_name" || "$vg_name" == "null" ]] && continue + vg_size="$(printf '%s' "$vg_json" | jq -r --arg n "$vg_name" '.report[0].vg[]? | select(.vg_name == $n) | .vg_size' | sed 's/[Bb]$//' | head -1)" + # Thin pools in this VG + thin_pools='[]' + while IFS= read -r lv_line; do + [[ -z "$lv_line" ]] && continue + lv_name="$(printf '%s' "$lv_line" | awk '{print $1}')" + lv_size="$(printf '%s' "$lv_line" | awk '{print $2}' | sed 's/[Bb]$//')" + thin_pools="$(jq --argjson acc "$thin_pools" \ + --arg n "$lv_name" --argjson s "${lv_size:-0}" \ + -n '$acc + [{lv_name: $n, size_bytes: $s}]')" + done < <(lvs --noheadings --units b -o lv_name,lv_size --select "vg_name=$vg_name && lv_attr=~^t" 2>/dev/null || true) + + lvm_vgs="$(jq --argjson acc "$lvm_vgs" \ + --arg n "$vg_name" --argjson s "${vg_size:-0}" --argjson tp "$thin_pools" \ + -n '$acc + [{name: $n, size_bytes: $s, thin_pools: $tp}]')" + done < <(printf '%s' "$vg_json" | jq -r '.report[0].vg[]?.vg_name' 2>/dev/null || true) +fi + +# ── Physical disks (by-id resolution) ── +physical_disks='[]' +# Build name → by-id map by walking /dev/disk/by-id/. A single block +# device usually has multiple by-id symlinks (ata-*, wwn-*, scsi-*, …). +# We prefer the most human-readable identifier in this order: +# ata-* → nvme-* → scsi-* → usb-* → wwn-* +# This also makes the manifest consistent with what `zpool status -P` +# reports (zpool defaults to ata-* / wwn-* depending on bus). +declare -A by_id_for +declare -A by_id_priority_for +priority_for_id() { + case "$1" in + ata-*) echo 1 ;; + nvme-*) echo 2 ;; + scsi-*) echo 3 ;; + usb-*) echo 4 ;; + wwn-*) echo 5 ;; + *) echo 9 ;; + esac +} +if [[ -d /dev/disk/by-id ]]; then + for link in /dev/disk/by-id/*; do + [[ -L "$link" ]] || continue + by_id="$(basename "$link")" + # Skip partition symlinks — we want whole-disk only. + [[ "$by_id" == *-part* ]] && continue + target="$(basename "$(readlink -f "$link")")" + [[ -z "$target" ]] && continue + new_prio="$(priority_for_id "$by_id")" + cur_prio="${by_id_priority_for[$target]:-99}" + if (( new_prio < cur_prio )); then + by_id_for["$target"]="$by_id" + by_id_priority_for["$target"]="$new_prio" + fi + done +fi + +# lsblk -d -b -J for whole disks +lsblk_json="$(lsblk -d -b -o NAME,MODEL,SIZE,TYPE -J 2>/dev/null || echo '{}')" +while IFS= read -r disk_line; do + [[ -z "$disk_line" ]] && continue + name="$(printf '%s' "$disk_line" | jq -r '.name')" + model="$(printf '%s' "$disk_line" | jq -r '.model // ""')" + size="$(printf '%s' "$disk_line" | jq -r '.size // 0')" + type="$(printf '%s' "$disk_line" | jq -r '.type')" + # Only PHYSICAL disks. + # - skip non-disk types (rom, loop) + # - skip zd* (ZFS zvols backing VMs) + # - skip dm-* (LVM-mapped devices) + # - skip loop* (defensive — type filter usually catches it) + [[ "$type" != "disk" ]] && continue + case "$name" in + zd*|dm-*|loop*) continue ;; + esac + by_id="${by_id_for[$name]:-}" + physical_disks="$(jq --argjson acc "$physical_disks" \ + --arg n "$name" --arg m "$model" --argjson s "${size:-0}" --arg bid "$by_id" \ + -n ' + $acc + [{ + name: $n, + model: (if $m == "" then null else $m end), + size_bytes: $s, + by_id: (if $bid == "" then null else $bid end) + }] + ')" +done < <(printf '%s' "$lsblk_json" | jq -c '.blockdevices[]?' 2>/dev/null || true) + +# ── PVE storage.cfg ── +# Format is whitespace-key-value with blank-line separators: +# : +# key value +# key value +pve_storage='[]' +if [[ -r /etc/pve/storage.cfg ]]; then + current_type=""; current_id=""; current_extra='{}' + flush() { + if [[ -n "$current_id" ]]; then + pve_storage="$(jq --argjson acc "$pve_storage" \ + --arg id "$current_id" --arg t "$current_type" --argjson e "$current_extra" \ + -n '$acc + [(($e) + {id: $id, type: $t})]')" + fi + current_type=""; current_id=""; current_extra='{}' + } + while IFS= read -r line; do + if [[ -z "${line// }" ]]; then + flush; continue + fi + if [[ "$line" =~ ^([a-z]+):[[:space:]]+([A-Za-z0-9_.-]+) ]]; then + flush + current_type="${BASH_REMATCH[1]}" + current_id="${BASH_REMATCH[2]}" + elif [[ "$line" =~ ^[[:space:]]+([a-z_]+)[[:space:]]+(.*)$ ]]; then + key="${BASH_REMATCH[1]}" + val="${BASH_REMATCH[2]}" + case "$key" in + # `content` is a comma-separated list — split into JSON array + content) + content_array="$(printf '%s\n' "$val" | tr ',' '\n' | jq -R . | jq -s .)" + current_extra="$(jq --argjson e "$current_extra" --argjson c "$content_array" -n '$e + {content: $c}')" + ;; + *) + current_extra="$(jq --argjson e "$current_extra" --arg k "$key" --arg v "$val" -n '$e + {($k): $v}')" + ;; + esac + fi + done < /etc/pve/storage.cfg + flush +fi + +# ── External mounts (NFS/CIFS/etc.) ── +# Filter on filesystem types we care about for the manifest. Drop FUSE +# pmxcfs (/etc/pve), tmpfs, devtmpfs, autofs, ZFS internals already +# accounted for. NFS, CIFS, ISO mount points are the interesting ones. +mounts='[]' +if command -v findmnt >/dev/null 2>&1; then + while IFS= read -r mline; do + [[ -z "$mline" ]] && continue + target="$(printf '%s' "$mline" | jq -r '.target')" + source="$(printf '%s' "$mline" | jq -r '.source')" + fstype="$(printf '%s' "$mline" | jq -r '.fstype')" + options="$(printf '%s' "$mline" | jq -r '.options // ""')" + mounts="$(jq --argjson acc "$mounts" \ + --arg t "$target" --arg s "$source" --arg f "$fstype" --arg o "$options" \ + -n ' + $acc + [{ + target: $t, + source: $s, + fstype: $f, + options: (if $o == "" then null else $o end) + }] + ')" + done < <(findmnt -t nfs,nfs4,cifs,smbfs,fuseblk,fuse.glusterfs -J 2>/dev/null \ + | jq -c '.. | objects | select(.target?)' 2>/dev/null \ + | grep -vE '"target":"/etc/pve"' || true) +fi + +# Compose +jq -n \ + --argjson zfs_pools "$zfs_pools" \ + --argjson lvm_vgs "$lvm_vgs" \ + --argjson physical_disks "$physical_disks" \ + --argjson pve_storage "$pve_storage" \ + --argjson mounts "$mounts" \ + '{ + zfs_pools: $zfs_pools, + lvm: { vgs: $lvm_vgs }, + physical_disks: $physical_disks, + pve_storage_cfg: $pve_storage, + mounts: $mounts + }' diff --git a/scripts/backup_restore/lib_host_backup_common.sh b/scripts/backup_restore/lib_host_backup_common.sh index fabc9394..444b3bb8 100644 --- a/scripts/backup_restore/lib_host_backup_common.sh +++ b/scripts/backup_restore/lib_host_backup_common.sh @@ -42,30 +42,56 @@ HB_UI_YESNO_W=78 # DEFAULT PROFILE PATHS # ========================================================== hb_default_profile_paths() { + # Curated list of paths that matter for a real Proxmox restore + # on a fresh host. Anything missing on the source is just + # noted in metadata/missing_paths.txt — no error. Grouped by + # category so it's easy to spot what's covered. local paths=( + # ── PVE core ────────────────────────────────────────── "/etc/pve" - "/etc/network" - "/etc/hosts" + "/var/lib/pve-cluster" + "/etc/vzdump.conf" + + # ── Host identity & networking ──────────────────────── "/etc/hostname" + "/etc/hosts" + "/etc/timezone" + "/etc/resolv.conf" + "/etc/network" + + # ── Access & auth ───────────────────────────────────── "/etc/ssh" - "/etc/systemd/system" + "/etc/sudoers" + "/etc/sudoers.d" + "/etc/pam.d" + "/etc/security" + + # ── Kernel / boot / hardware ────────────────────────── + "/etc/default/grub" + "/etc/kernel" "/etc/modules" "/etc/modules-load.d" "/etc/modprobe.d" + "/etc/sysctl.conf" + "/etc/sysctl.d" "/etc/udev/rules.d" - "/etc/default/grub" "/etc/fstab" - "/etc/kernel" - "/etc/apt" - "/etc/vzdump.conf" - "/etc/postfix" - "/etc/resolv.conf" - "/etc/timezone" "/etc/iscsi" "/etc/multipath" - "/usr/local/bin" - "/usr/local/share/proxmenux" - "/root" + + # ── Shell / locale / env ────────────────────────────── + "/etc/environment" + "/etc/bash.bashrc" + "/etc/inputrc" + "/etc/profile" + "/etc/profile.d" # figurine and other shell add-ons live here + "/etc/locale.gen" + "/etc/locale.conf" + + # ── Packaging ───────────────────────────────────────── + "/etc/apt" + + # ── Cron ────────────────────────────────────────────── "/etc/cron.d" "/etc/cron.daily" "/etc/cron.hourly" @@ -74,8 +100,35 @@ hb_default_profile_paths() { "/etc/cron.allow" "/etc/cron.deny" "/var/spool/cron/crontabs" - "/var/lib/pve-cluster" + + # ── Common Proxmox tooling (skipped if not present) ── + "/etc/systemd/system" # custom units (including log2ram.service if installed) + "/etc/log2ram.conf" + "/etc/lm-sensors" + "/etc/sensors3.conf" + "/etc/fail2ban" + "/etc/snmp" + "/etc/postfix" + + # ── Monitoring / VPN (skipped if not present) ──────── + "/etc/wireguard" + "/etc/openvpn" + "/etc/grafana" + "/etc/influxdb" + "/etc/prometheus" + "/etc/telegraf" + "/etc/zabbix" + + # ── ProxMenux-installed binaries & app state ───────── + "/usr/local/bin" + "/usr/local/sbin" + "/usr/local/share/proxmenux" + + # ── Root home (rsync excludes volatile dirs) ───────── + "/root" ) + # ZFS state only when the host runs ZFS — same convention + # used pre-expansion. if [[ -d /etc/zfs ]] || command -v zpool >/dev/null 2>&1; then paths+=("/etc/zfs") fi @@ -231,6 +284,18 @@ hb_prepare_staging() { command -v pct >/dev/null 2>&1 && pct list > "$meta/pct-list.txt" 2>&1 || true command -v zpool >/dev/null 2>&1 && zpool status > "$meta/zpool.txt" 2>&1 || true + # Package inventory — captures what's installed on the source + # host so the restore flow can offer to reinstall missing user + # packages on the target. Solves the "config restored but the + # binary is missing, service hangs at boot" class of issues + # (log2ram, figurine, sensors etc. installed by post-install). + if command -v dpkg >/dev/null 2>&1; then + dpkg --get-selections > "$meta/packages.list" 2>/dev/null || true + fi + if command -v apt-mark >/dev/null 2>&1; then + apt-mark showmanual > "$meta/packages.manual.list" 2>/dev/null || true + fi + # Manifest + checksums ( cd "$staging_root/rootfs" || return 1 @@ -269,9 +334,10 @@ hb_collect_pbs_configs() { HB_PBS_REPOS=() HB_PBS_SECRETS=() HB_PBS_SOURCES=() + HB_PBS_FINGERPRINTS=() if [[ -f /etc/pve/storage.cfg ]]; then - local current="" server="" datastore="" username="" pw_file pw_val + local current="" server="" datastore="" username="" fingerprint="" pw_file pw_val while IFS= read -r line; do line="${line%%#*}" line="${line#"${line%%[![:space:]]*}"}" @@ -285,12 +351,20 @@ hb_collect_pbs_configs() { HB_PBS_REPOS+=("${username}@${server}:${datastore}") HB_PBS_SECRETS+=("$pw_val") HB_PBS_SOURCES+=("proxmox") + HB_PBS_FINGERPRINTS+=("$fingerprint") fi - current="${BASH_REMATCH[1]}"; server="" datastore="" username="" + current="${BASH_REMATCH[1]}"; server="" datastore="" username="" fingerprint="" elif [[ -n "$current" ]]; then - [[ $line =~ ^[[:space:]]+server[[:space:]]+(.+)$ ]] && server="${BASH_REMATCH[1]}" - [[ $line =~ ^[[:space:]]+datastore[[:space:]]+(.+)$ ]] && datastore="${BASH_REMATCH[1]}" - [[ $line =~ ^[[:space:]]+username[[:space:]]+(.+)$ ]] && username="${BASH_REMATCH[1]}" + # The line was already trimmed of leading/trailing + # whitespace above. Match the field name directly at + # the start of the (post-trim) line — the old regex + # demanded leading whitespace that the trim had + # already stripped, so the sub-fields were silently + # never captured. + [[ $line =~ ^server[[:space:]]+(.+)$ ]] && server="${BASH_REMATCH[1]}" + [[ $line =~ ^datastore[[:space:]]+(.+)$ ]] && datastore="${BASH_REMATCH[1]}" + [[ $line =~ ^username[[:space:]]+(.+)$ ]] && username="${BASH_REMATCH[1]}" + [[ $line =~ ^fingerprint[[:space:]]+(.+)$ ]] && fingerprint="${BASH_REMATCH[1]}" if [[ $line =~ ^[a-zA-Z]+:[[:space:]] && -n "$server" && -n "$datastore" && -n "$username" ]]; then pw_file="/etc/pve/priv/storage/${current}.pw" @@ -299,7 +373,8 @@ hb_collect_pbs_configs() { HB_PBS_REPOS+=("${username}@${server}:${datastore}") HB_PBS_SECRETS+=("$pw_val") HB_PBS_SOURCES+=("proxmox") - current="" server="" datastore="" username="" + HB_PBS_FINGERPRINTS+=("$fingerprint") + current="" server="" datastore="" username="" fingerprint="" fi fi done < /etc/pve/storage.cfg @@ -311,13 +386,14 @@ hb_collect_pbs_configs() { HB_PBS_REPOS+=("${username}@${server}:${datastore}") HB_PBS_SECRETS+=("$pw_val") HB_PBS_SOURCES+=("proxmox") + HB_PBS_FINGERPRINTS+=("$fingerprint") fi fi # Manual configs local manual_cfg="$HB_STATE_DIR/pbs-manual-configs.txt" if [[ -f "$manual_cfg" ]]; then - local line name repo sf + local line name repo sf fp_file while IFS= read -r line; do line="${line%%#*}" line="${line#"${line%%[![:space:]]*}"}" @@ -325,9 +401,11 @@ hb_collect_pbs_configs() { [[ -z "$line" ]] && continue name="${line%%|*}"; repo="${line##*|}" sf="$HB_STATE_DIR/pbs-pass-${name}.txt" + fp_file="$HB_STATE_DIR/pbs-fingerprint-${name}.txt" HB_PBS_NAMES+=("$name"); HB_PBS_REPOS+=("$repo") HB_PBS_SECRETS+=("$([[ -f "$sf" ]] && cat "$sf" || echo "")") HB_PBS_SOURCES+=("manual") + HB_PBS_FINGERPRINTS+=("$([[ -f "$fp_file" ]] && cat "$fp_file" || echo "")") done < "$manual_cfg" fi } @@ -395,6 +473,14 @@ hb_select_pbs_repository() { HB_PBS_NAME="${HB_PBS_NAMES[$sel]}" export HB_PBS_REPOSITORY="${HB_PBS_REPOS[$sel]}" HB_PBS_SECRET="${HB_PBS_SECRETS[$sel]}" + # Export the fingerprint so _bk_pbs / _rs_extract_pbs can + # pass it to proxmox-backup-client via PBS_FINGERPRINT. The + # binary otherwise prompts "Are you sure you want to + # continue connecting? (y/n):" — twice in some flows + # (backup + catalog upload) — and silently auto-accepts on + # stdin closure, which is both noisy and an MITM risk on a + # cross-host restore. + export HB_PBS_FINGERPRINT="${HB_PBS_FINGERPRINTS[$sel]:-}" if [[ -z "$HB_PBS_SECRET" ]]; then HB_PBS_SECRET=$(dialog --backtitle "ProxMenux" --title "PBS" \ --insecure --passwordbox \ @@ -407,59 +493,298 @@ hb_select_pbs_repository() { fi } +# ========================================================== +# PBS KEYFILE RECOVERY +# +# `proxmox-backup-client key create` cannot set a KDF passphrase +# non-interactively, so we generate the keyfile with `--kdf none` +# and add our OWN passphrase-based recovery layer on top: +# +# 1. After creating the keyfile, ask the operator for a recovery +# passphrase. Encrypt the keyfile with openssl using that +# passphrase → produces `pbs-key.recovery.enc`. +# 2. On every PBS backup, we upload `pbs-key.recovery.enc` to a +# SEPARATE backup group (`host/proxmenux-keyrecovery-`) +# with NO `--keyfile` flag — so PBS stores it as a regular +# (non-PBS-encrypted) blob. The blob is still protected by +# the operator's passphrase via openssl. +# 3. On a fresh install where the local keyfile is missing, the +# restore flow looks up the recovery group in PBS, downloads +# the blob, asks for the passphrase, decrypts it, and writes +# the keyfile back to its canonical location. +# +# So the operator only needs to remember the passphrase. The +# encrypted recovery copy travels with their PBS backups +# automatically; no manual offsite keyfile escrow required. +# ========================================================== + +hb_pbs_encrypt_recovery() { + # Reads passphrase from stdin. AES-256-CBC + PBKDF2 with 600k + # iterations — standard openssl format, decryptable from any + # host with openssl ≥ 1.1.1. + openssl enc -aes-256-cbc -pbkdf2 -iter 600000 -salt \ + -in "$1" -out "$2" -pass stdin 2>/dev/null +} + +hb_pbs_decrypt_recovery() { + openssl enc -d -aes-256-cbc -pbkdf2 -iter 600000 \ + -in "$1" -out "$2" -pass stdin 2>/dev/null +} + +hb_pbs_setup_recovery() { + local key_file="$HB_STATE_DIR/pbs-key.conf" + local recovery_enc="$HB_STATE_DIR/pbs-key.recovery.enc" + + dialog --backtitle "ProxMenux" --title "$(hb_translate "Keyfile recovery setup")" \ + --yesno "$(hb_translate "Set a recovery passphrase for this keyfile? (Strongly recommended)")"$'\n\n'"$(hb_translate "With a recovery passphrase, an encrypted copy of the keyfile is uploaded to PBS with every backup. If you lose this host, you can recover the keyfile on a fresh install using only the passphrase.")"$'\n\n'"$(hb_translate "Without a recovery passphrase, losing the keyfile means the encrypted backups become unrecoverable forever.")" \ + 17 80 || return 1 + + if ! command -v openssl >/dev/null 2>&1; then + dialog --backtitle "ProxMenux" --title "$(hb_translate "Recovery setup failed")" \ + --msgbox "$(hb_translate "openssl is not installed — cannot create recovery copy. Install openssl and retry.")" 9 70 + return 1 + fi + + local pass1 pass2 + while true; do + pass1=$(dialog --backtitle "ProxMenux" --title "$(hb_translate "Recovery passphrase")" \ + --insecure --passwordbox "$(hb_translate "Choose a recovery passphrase (write it down somewhere safe):")" \ + "$HB_UI_PASS_H" "$HB_UI_PASS_W" "" 3>&1 1>&2 2>&3) || return 1 + [[ -z "$pass1" ]] && continue + pass2=$(dialog --backtitle "ProxMenux" --title "$(hb_translate "Recovery passphrase")" \ + --insecure --passwordbox "$(hb_translate "Confirm recovery passphrase:")" \ + "$HB_UI_PASS_H" "$HB_UI_PASS_W" "" 3>&1 1>&2 2>&3) || return 1 + [[ "$pass1" == "$pass2" ]] && break + dialog --backtitle "ProxMenux" \ + --msgbox "$(hb_translate "Passphrases do not match. Try again.")" 8 50 + done + + if ! printf '%s' "$pass1" | hb_pbs_encrypt_recovery "$key_file" "$recovery_enc"; then + dialog --backtitle "ProxMenux" --title "$(hb_translate "Recovery setup failed")" \ + --msgbox "$(hb_translate "openssl encryption failed.")" 9 70 + return 1 + fi + chmod 600 "$recovery_enc" + + # Drop an easy-export copy in /root so the operator can scp/USB + # it offsite without spelunking through HB_STATE_DIR. + local export_copy="/root/pbs-key.recovery-$(hostname)-$(date +%Y%m%d).enc" + if cp "$recovery_enc" "$export_copy" 2>/dev/null; then + chmod 600 "$export_copy" + else + export_copy="" + fi + + local success_msg + success_msg="$(hb_translate "Recovery configured.")"$'\n\n' + success_msg+="$(hb_translate "Every PBS backup from now on will also upload the encrypted recovery copy to PBS — automatically, no extra steps from you.")"$'\n\n' + success_msg+="$(hb_translate "If you lose this host: install ProxMenux on a fresh PVE host, point it at the same PBS, and the restore flow will offer to recover the keyfile using your passphrase.")" + if [[ -n "$export_copy" ]]; then + success_msg+=$'\n\n'"$(hb_translate "Offsite copy (optional):") $export_copy" + fi + dialog --backtitle "ProxMenux" --title "$(hb_translate "Recovery ready")" \ + --msgbox "$success_msg" 18 80 + return 0 +} + +# Upload the local recovery .enc to PBS as a separate snapshot +# group. Called from _bk_pbs after the main backup succeeds. +# Skips silently if no recovery copy is present. Returns 0 on +# success or skip, 1 on upload failure. +hb_pbs_upload_recovery_blob() { + local epoch="$1" + local recovery_enc="$HB_STATE_DIR/pbs-key.recovery.enc" + [[ ! -f "$recovery_enc" ]] && return 0 + + # `proxmox-backup-client backup` only accepts archive types + # `pxar` / `img` / `conf` / `log` as the source spec — `.blob` + # is an internal storage format, not a valid input type. The + # recovery file is a small openssl-encrypted blob so we use + # `.conf` (which PBS stores internally as `.conf.blob`). On + # restore we ask for `keyrecovery.conf` (without the .blob + # suffix) and PBS resolves it transparently. + # Note: deliberately NO --keyfile here. The blob is already + # passphrase-encrypted by openssl; we want PBS to store it as + # a plain blob so it can be retrieved without the keyfile. + PBS_PASSWORD="$HB_PBS_SECRET" \ + PBS_FINGERPRINT="${HB_PBS_FINGERPRINT:-}" \ + proxmox-backup-client backup \ + "keyrecovery.conf:$recovery_enc" \ + --repository "$HB_PBS_REPOSITORY" \ + --backup-type host \ + --backup-id "proxmenux-keyrecovery-$(hostname)" \ + --backup-time "$epoch" \ + >/dev/null 2>&1 +} + +# On a fresh install with no local keyfile, try to recover it +# from PBS. Returns 0 if the keyfile was successfully restored +# to $1, 1 if no recovery is possible or the user cancelled. +hb_pbs_try_keyfile_recovery() { + local target_keyfile="$1" + + if ! command -v openssl >/dev/null 2>&1; then + return 1 # silently — main path will surface a clearer error + fi + + # Discover all proxmenux-keyrecovery-* groups in PBS, picking + # the newest snapshot for each group (one row per host). + local -a recovery_entries=() + mapfile -t recovery_entries < <( + PBS_PASSWORD="$HB_PBS_SECRET" \ + PBS_FINGERPRINT="${HB_PBS_FINGERPRINT:-}" \ + proxmox-backup-client snapshot list \ + --repository "$HB_PBS_REPOSITORY" \ + --output-format json 2>/dev/null \ + | jq -r '.[] | select(."backup-type" == "host" and (."backup-id" | startswith("proxmenux-keyrecovery-"))) | "\(."backup-id")|\(."backup-time")"' 2>/dev/null \ + | sort -t'|' -k1,1 -k2,2nr \ + | awk -F'|' '!seen[$1]++' + ) + + if [[ ${#recovery_entries[@]} -eq 0 ]]; then + return 1 # no recovery available — main flow will fail later on + # the actual decrypt, with a clear message + fi + + # Pick the recovery group (auto if one, ask if many) + local picked_id picked_epoch + if [[ ${#recovery_entries[@]} -eq 1 ]]; then + IFS='|' read -r picked_id picked_epoch <<< "${recovery_entries[0]}" + else + local menu=() i=1 + local entry id_part host_part iso_label + for entry in "${recovery_entries[@]}"; do + id_part="${entry%%|*}" + host_part="${id_part#proxmenux-keyrecovery-}" + iso_label=$(date -u -d "@${entry##*|}" '+%Y-%m-%d %H:%M' 2>/dev/null || echo "${entry##*|}") + menu+=("$i" "$host_part — $iso_label UTC") + ((i++)) + done + local sel + sel=$(dialog --backtitle "ProxMenux" \ + --title "$(hb_translate "Keyfile recovery — pick source host")" \ + --menu "$(hb_translate "Multiple recovery groups found in PBS. Pick the one that originally created the keyfile:")" \ + 18 78 10 "${menu[@]}" 3>&1 1>&2 2>&3) || return 1 + IFS='|' read -r picked_id picked_epoch <<< "${recovery_entries[$((sel-1))]}" + fi + + local iso + iso=$(date -u -d "@$picked_epoch" '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null || echo "$picked_epoch") + local recovery_snapshot="host/${picked_id}/${iso}" + + dialog --backtitle "ProxMenux" --title "$(hb_translate "Keyfile recovery available")" \ + --yesno "$(hb_translate "Local keyfile is missing but a recovery copy was found in PBS.")"$'\n\n'"$(hb_translate "Snapshot:") $recovery_snapshot"$'\n\n'"$(hb_translate "Recover the keyfile using your recovery passphrase?")" \ + 13 78 || return 1 + + # Download the blob once; we may retry passphrase entry without + # re-fetching it. + local tmp_dir + tmp_dir=$(mktemp -d /tmp/_pmnx_keyrec.XXXXXX) || return 1 + # `restore` wants a FILE target (not a directory) for non-pxar + # archives — and we ask for `keyrecovery.conf` (matches the + # name used on upload), which PBS resolves to the underlying + # `keyrecovery.conf.blob` automatically. + if ! PBS_PASSWORD="$HB_PBS_SECRET" \ + PBS_FINGERPRINT="${HB_PBS_FINGERPRINT:-}" \ + proxmox-backup-client restore "$recovery_snapshot" "keyrecovery.conf" "$tmp_dir/keyrecovery.enc" \ + --repository "$HB_PBS_REPOSITORY" >/dev/null 2>&1; then + rm -rf "$tmp_dir" + dialog --backtitle "ProxMenux" --title "$(hb_translate "Recovery failed")" \ + --msgbox "$(hb_translate "Could not download recovery blob from PBS.")" 9 70 + return 1 + fi + + local passphrase + while true; do + passphrase=$(dialog --backtitle "ProxMenux" --title "$(hb_translate "Recovery passphrase")" \ + --insecure --passwordbox "$(hb_translate "Enter the recovery passphrase set when the keyfile was created:")" \ + "$HB_UI_PASS_H" "$HB_UI_PASS_W" "" 3>&1 1>&2 2>&3) \ + || { rm -rf "$tmp_dir"; return 1; } + [[ -z "$passphrase" ]] && continue + + mkdir -p "$(dirname "$target_keyfile")" + if printf '%s' "$passphrase" | hb_pbs_decrypt_recovery "$tmp_dir/keyrecovery.enc" "$target_keyfile"; then + chmod 600 "$target_keyfile" + rm -rf "$tmp_dir" + dialog --backtitle "ProxMenux" --title "$(hb_translate "Keyfile recovered")" \ + --msgbox "$(hb_translate "Keyfile recovered successfully.")"$'\n\n'"$(hb_translate "Location:") $target_keyfile"$'\n\n'"$(hb_translate "Restore can now proceed.")" \ + 12 70 + return 0 + fi + # Decryption failed — wrong passphrase (or corrupt blob) + if ! dialog --backtitle "ProxMenux" --title "$(hb_translate "Wrong passphrase")" \ + --yesno "$(hb_translate "Decryption failed. The passphrase may be wrong, or the blob is corrupt. Try again?")" \ + 9 70; then + rm -rf "$tmp_dir" + return 1 + fi + done +} + + hb_ask_pbs_encryption() { local key_file="$HB_STATE_DIR/pbs-key.conf" - local enc_pass_file="$HB_STATE_DIR/pbs-encryption-pass.txt" export HB_PBS_KEYFILE_OPT="" export HB_PBS_ENC_PASS="" + # Wipe any scrollback that might leak above our dialogs — most + # often the terminal title or a stray line from a prior manual + # `proxmox-backup-client` invocation in the same SSH session. + clear + # Reset the window title in case a prior tool set it (the + # `Encryption Key Password:` title that proxmox-backup-client + # sets when prompting interactively, for instance — it sticks + # around in xterm-compatible terminals until overwritten). + printf '\033]0;ProxMenux\007' + dialog --backtitle "ProxMenux" --title "$(hb_translate "Encryption")" \ --yesno "$(hb_translate "Encrypt this backup with a keyfile?")" \ "$HB_UI_YESNO_H" "$HB_UI_YESNO_W" || return 0 if [[ -f "$key_file" ]]; then export HB_PBS_KEYFILE_OPT="--keyfile $key_file" - if [[ -f "$enc_pass_file" ]]; then - HB_PBS_ENC_PASS="$(<"$enc_pass_file")" - export HB_PBS_ENC_PASS - fi msg_ok "$(hb_translate "Using existing encryption key:") $key_file" return 0 fi - # No key — offer to create one - dialog --backtitle "ProxMenux" --title "$(hb_translate "Encryption")" \ - --yesno "$(hb_translate "No encryption key found. Create one now?")" \ - "$HB_UI_YESNO_H" "$HB_UI_YESNO_W" || return 0 - - local pass1 pass2 - while true; do - pass1=$(dialog --backtitle "ProxMenux" --insecure --passwordbox \ - "$(hb_translate "Encryption passphrase (separate from PBS password):")" \ - "$HB_UI_PASS_H" "$HB_UI_PASS_W" "" 3>&1 1>&2 2>&3) || return 0 - pass2=$(dialog --backtitle "ProxMenux" --insecure --passwordbox \ - "$(hb_translate "Confirm encryption passphrase:")" \ - "$HB_UI_PASS_H" "$HB_UI_PASS_W" "" 3>&1 1>&2 2>&3) || return 0 - [[ "$pass1" == "$pass2" ]] && break - dialog --backtitle "ProxMenux" \ - --msgbox "$(hb_translate "Passphrases do not match. Try again.")" 8 50 - done + # No key — create one. We deliberately do NOT prompt for a + # passphrase because `proxmox-backup-client key create` does + # not accept the passphrase via env var or stdin — it reads it + # from a real TTY, which we can't safely provide from a dialog + # flow. Instead we generate the keyfile with `--kdf none` (no + # passphrase wrapping) and add our own recovery layer on top + # via hb_pbs_setup_recovery (see the recovery block above). + dialog --backtitle "ProxMenux" --title "$(hb_translate "Create encryption key")" \ + --yesno "$(hb_translate "Generate a new keyfile?")"$'\n\n'"$(hb_translate "Location:") $key_file"$'\n'"$(hb_translate "Protection: chmod 600 (no passphrase on the keyfile itself)")"$'\n\n'"$(hb_translate "Next step will offer a recovery passphrase so the keyfile can be retrieved from PBS if you lose this host.")" \ + 14 80 || return 0 msg_info "$(hb_translate "Creating PBS encryption key...")" - if PBS_ENCRYPTION_PASSWORD="$pass1" \ - proxmox-backup-client key create "$key_file" >/dev/null 2>&1; then - printf '%s' "$pass1" > "$enc_pass_file" - chmod 600 "$enc_pass_file" + mkdir -p "$HB_STATE_DIR" + local create_stderr + create_stderr=$(proxmox-backup-client key create --kdf none "$key_file" &1 >/dev/null) + local create_rc=$? + if [[ $create_rc -eq 0 && -f "$key_file" ]]; then + chmod 600 "$key_file" msg_ok "$(hb_translate "Encryption key created:") $key_file" HB_PBS_KEYFILE_OPT="--keyfile $key_file" - HB_PBS_ENC_PASS="$pass1" - local key_warn_msg - key_warn_msg="$(hb_translate "IMPORTANT: Back up this key file. Without it the backup cannot be restored.")"$'\n\n'"$(hb_translate "Key:") $key_file" - dialog --backtitle "ProxMenux" --msgbox \ - "$key_warn_msg" \ - 10 74 + + # Offer to set up automatic PBS-based recovery for the + # keyfile. The operator can decline if they want to handle + # offsite escrow manually, but the default flow nudges them + # to enable it. + hb_pbs_setup_recovery || true else - msg_error "$(hb_translate "Failed to create encryption key. Backup will proceed without encryption.")" + # Surface the actual error from proxmox-backup-client to the + # operator — silent failures here were the reason the user + # kept seeing `Encryption: Disabled` after entering the + # passphrase. Now we show what proxmox-backup-client said. + local err_msg + err_msg="$(hb_translate "Failed to create encryption key. Backup will proceed without encryption.")"$'\n\n' + err_msg+="$(hb_translate "Tool exit code:") $create_rc"$'\n' + err_msg+="$(hb_translate "Tool output:")"$'\n' + err_msg+="${create_stderr:-(empty)}" + dialog --backtitle "ProxMenux" --title "$(hb_translate "Encryption key creation failed")" \ + --msgbox "$err_msg" 14 78 fi } @@ -685,6 +1010,52 @@ hb_prompt_restore_source_dir() { echo "$out" } +# Return the set of scheduler job_ids that currently have a .env on +# disk. Used by hb_is_host_backup_archive to recognize archives +# produced by the scheduler when the filename doesn't follow the +# `hostcfg-` convention. Prints one id per line. +hb_known_scheduler_job_ids() { + local jobs_dir="${PMX_BACKUP_JOBS_DIR:-/var/lib/proxmenux/backup-jobs}" + [[ -d "$jobs_dir" ]] || return 0 + local f + for f in "$jobs_dir"/*.env; do + [[ -f "$f" ]] || continue + basename "$f" .env + done +} + +# Decide whether $path looks like a ProxMenux host backup. Cheap +# checks only — sidecar presence + filename heuristics. We do NOT +# tar-peek here because the picker may face dozens of candidates +# and the user is waiting in front of a dialog; the in-Monitor +# endpoint takes care of peek-based detection where SWR can cache +# the result. Returns 0 on match, non-zero otherwise. +hb_is_host_backup_archive() { + local path="$1" + [[ -z "$path" || ! -f "$path" ]] && return 1 + # 1. Sidecar present → definitive yes. + [[ -f "${path}.proxmenux.json" ]] && return 0 + local name stem + name=$(basename "$path") + # Strip the timestamped suffix; we only need the part BEFORE the + # `-YYYYMMDD_HHMMSS.tar.*` tail. + stem="${name%-[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_[0-9][0-9][0-9][0-9][0-9][0-9].tar*}" + # If the strip didn't change anything, the file doesn't follow + # the ProxMenux timestamp convention at all → reject (this kills + # PVE's vzdump-lxc-101-2026_02_24-20_00_56.tar.zst because its + # date uses underscores between Y/M/D, not the YYYYMMDD_ form). + [[ "$stem" == "$name" ]] && return 1 + # 2. hostcfg- prefix → manual or convention-following scheduled. + [[ "$stem" == hostcfg-* ]] && return 0 + # 3. Known scheduler job_id → scheduled. + local jid + while IFS= read -r jid; do + [[ -z "$jid" ]] && continue + [[ "$stem" == "$jid" ]] && return 0 + done < <(hb_known_scheduler_job_ids) + return 1 +} + hb_prompt_local_archive() { local base_dir="$1" local title="${2:-$(hb_translate "Select backup archive")}" @@ -700,17 +1071,39 @@ hb_prompt_local_archive() { | head -200 ) + # Filter the raw find result down to ProxMenux host backups — + # the picker historically showed every .tar* in /var/lib/vz/dump, + # which on a typical Proxmox host means dozens of vzdump-lxc-* + # entries that aren't restorable from this menu. We track the + # drop count so we can tell the operator something was filtered. + local -a kept=() + local hidden=0 row path + for row in "${rows[@]}"; do + path="${row##*|}" + if hb_is_host_backup_archive "$path"; then + kept+=("$row") + else + ((hidden++)) + fi + done + rows=("${kept[@]}") + if [[ ${#rows[@]} -eq 0 ]]; then local no_backups_msg - no_backups_msg="$(hb_translate "No backup archives were found in:") $base_dir"$'\n\n'"$(hb_translate "Select another source path and try again.")" + no_backups_msg="$(hb_translate "No ProxMenux host-backup archives were found in:") $base_dir" + if (( hidden > 0 )); then + no_backups_msg+=$'\n\n'"$(hb_translate "Found") $hidden $(hb_translate "other .tar archive(s) — not ProxMenux host backups (e.g. PVE vzdump or unrelated tarballs).")" + else + no_backups_msg+=$'\n\n'"$(hb_translate "Select another source path and try again.")" + fi dialog --backtitle "ProxMenux" \ --title "$(hb_translate "No backups found")" \ --msgbox "$no_backups_msg" \ - 10 78 || true + 12 78 || true return 1 fi - local i=1 row epoch size path date_str size_str label + local i=1 epoch size date_str size_str label for row in "${rows[@]}"; do epoch="${row%%|*}"; row="${row#*|}" size="${row%%|*}"; path="${row#*|}" @@ -721,9 +1114,15 @@ hb_prompt_local_archive() { files+=("$path"); menu+=("$i" "$label"); ((i++)) done + local menu_prompt + menu_prompt="\n$(hb_translate "Detected backups — newest first:")" + if (( hidden > 0 )); then + menu_prompt+=$'\n'"($(hb_translate "Hidden:") $hidden $(hb_translate "non-ProxMenux .tar archive(s) in this path"))" + fi + local choice choice=$(dialog --backtitle "ProxMenux" --title "$title" \ - --menu "\n$(hb_translate "Detected backups — newest first:")" \ + --menu "$menu_prompt" \ "$HB_UI_MENU_H" "$HB_UI_MENU_W" "$HB_UI_MENU_LIST" "${menu[@]}" 3>&1 1>&2 2>&3) || return 1 echo "${files[$((choice-1))]}" @@ -768,3 +1167,365 @@ hb_require_cmd() { fi command -v "$cmd" >/dev/null 2>&1 } + +# ========================================================== +# Compatibility check — compares backup metadata against the +# current host and surfaces hostname / PVE version / kernel / +# storage / network / VMID drift BEFORE the apply menu opens. +# +# After running hb_compat_check, the caller can read: +# HB_COMPAT_SAME_HOST → 1 if backup's hostname matches current +# HB_COMPAT_ANY_FAIL → 1 if at least one FAIL was raised +# HB_COMPAT_ANY_WARN → 1 if at least one WARN was raised +# HB_COMPAT_RESULTS[] → array of "STATUS|category|message" entries +# Use hb_show_compat_report to surface the result and let the user +# decide whether to continue. +# ========================================================== +hb_compat_check() { + local staging_root="$1" + HB_COMPAT_RESULTS=() + HB_COMPAT_SAME_HOST=0 + HB_COMPAT_ANY_FAIL=0 + HB_COMPAT_ANY_WARN=0 + + local meta="$staging_root/metadata" + local rootfs="$staging_root/rootfs" + + # --- HOST IDENTITY --- + local bk_hostname="" cur_hostname + if [[ -f "$meta/run_info.env" ]]; then + bk_hostname=$(grep -m1 '^hostname=' "$meta/run_info.env" 2>/dev/null | cut -d= -f2-) + fi + cur_hostname=$(hostname 2>/dev/null || echo "") + if [[ -n "$bk_hostname" ]]; then + if [[ "$bk_hostname" == "$cur_hostname" ]]; then + HB_COMPAT_SAME_HOST=1 + HB_COMPAT_RESULTS+=("PASS|Host|$(hb_translate "Same host:") $bk_hostname") + else + HB_COMPAT_RESULTS+=("WARN|Host|$(hb_translate "Different host. Backup from:") $bk_hostname / $(hb_translate "restoring on:") $cur_hostname") + HB_COMPAT_ANY_WARN=1 + fi + fi + + # --- PVE VERSION --- + # `pveversion.txt` from the backup is `pveversion -v` output, where + # each package is on its own line as `: ` (note the + # SPACE after the colon, not a slash). Live `pveversion` (no flag) + # uses `//` form. Cover both. + local bk_pve="" cur_pve bk_major cur_major + if [[ -f "$meta/pveversion.txt" ]]; then + bk_pve=$(grep -m1 -oE '(^|[[:space:]])pve-manager[[:space:]]*[:/][[:space:]]*[0-9]+\.[0-9]+(\.[0-9]+)?' "$meta/pveversion.txt" 2>/dev/null \ + | grep -oE '[0-9]+\.[0-9]+(\.[0-9]+)?' | head -1) + fi + if command -v pveversion >/dev/null 2>&1; then + cur_pve=$(pveversion 2>/dev/null | grep -m1 -oE 'pve-manager/[0-9]+\.[0-9]+(\.[0-9]+)?' \ + | grep -oE '[0-9]+\.[0-9]+(\.[0-9]+)?' | head -1) + fi + if [[ -n "$bk_pve" && -n "$cur_pve" ]]; then + bk_major="${bk_pve%%.*}" + cur_major="${cur_pve%%.*}" + if [[ "$bk_pve" == "$cur_pve" ]]; then + HB_COMPAT_RESULTS+=("PASS|PVE version|$(hb_translate "Identical:") $bk_pve") + elif [[ "$bk_major" == "$cur_major" ]]; then + HB_COMPAT_RESULTS+=("PASS|PVE version|$(hb_translate "Same major series:") $bk_pve → $cur_pve") + else + HB_COMPAT_RESULTS+=("FAIL|PVE version|$(hb_translate "Major version mismatch:") $bk_pve → $cur_pve $(hb_translate "(default paths and packages may have changed)")") + HB_COMPAT_ANY_FAIL=1 + fi + fi + + # --- KERNEL --- + local bk_kernel="" cur_kernel + if [[ -f "$meta/run_info.env" ]]; then + bk_kernel=$(grep -m1 '^kernel=' "$meta/run_info.env" 2>/dev/null | cut -d= -f2-) + fi + cur_kernel=$(uname -r 2>/dev/null) + if [[ -n "$bk_kernel" && -n "$cur_kernel" ]]; then + if [[ "$bk_kernel" == "$cur_kernel" ]]; then + HB_COMPAT_RESULTS+=("PASS|Kernel|$(hb_translate "Identical:") $bk_kernel") + else + local bk_kmaj cur_kmaj + bk_kmaj=$(echo "$bk_kernel" | cut -d. -f1-2) + cur_kmaj=$(echo "$cur_kernel" | cut -d. -f1-2) + if [[ "$bk_kmaj" == "$cur_kmaj" ]]; then + HB_COMPAT_RESULTS+=("PASS|Kernel|$(hb_translate "Same major.minor:") $bk_kernel → $cur_kernel") + else + HB_COMPAT_RESULTS+=("WARN|Kernel|$(hb_translate "Different kernel:") $bk_kernel → $cur_kernel") + HB_COMPAT_ANY_WARN=1 + fi + fi + fi + + # --- STORAGE LAYOUT --- + if [[ -f "$rootfs/etc/pve/storage.cfg" ]] && command -v pvesm >/dev/null 2>&1; then + local -a bk_storages=() missing=() + # `: ` is the storage.cfg block header form. + mapfile -t bk_storages < <(grep -E '^[a-z]+:[[:space:]]+[A-Za-z0-9_.-]+' \ + "$rootfs/etc/pve/storage.cfg" 2>/dev/null | awk '{print $2}' | sort -u) + local s + for s in "${bk_storages[@]}"; do + [[ -z "$s" ]] && continue + if ! pvesm status 2>/dev/null | awk 'NR>1 {print $1}' | grep -qx "$s"; then + missing+=("$s") + fi + done + if [[ ${#bk_storages[@]} -eq 0 ]]; then + : # backup didn't include storage.cfg or it was empty + elif [[ ${#missing[@]} -eq 0 ]]; then + HB_COMPAT_RESULTS+=("PASS|Storage|$(hb_translate "All") ${#bk_storages[@]} $(hb_translate "storage(s) from backup exist on target")") + else + HB_COMPAT_RESULTS+=("WARN|Storage|$(hb_translate "Missing on target:") ${missing[*]}") + HB_COMPAT_ANY_WARN=1 + fi + fi + + # --- NETWORK INTERFACES --- + # We only flag physical NICs that the backup references but the + # target doesn't expose. Virtual interfaces (vmbr, bond, tap, veth, + # fwbr/fwln/fwpr, lo, VLAN suffixes) are skipped because they're + # created by the restored configuration itself. + # + # A "missing" NIC needs further triage before we cry FAIL: a + # backup often carries orphan `iface inet manual` lines + # left over from previous hardware that PVE never cleans up. + # Those declarations do nothing if the NIC doesn't exist — they + # don't bring it up, don't bridge it, don't bond it. Only NICs + # that are actually WIRED into the live config (auto-up, in a + # bridge_ports, in a bond_slaves) would lose connectivity if the + # NIC isn't present on the target. + if [[ -f "$rootfs/etc/network/interfaces" ]]; then + local ifaces_file="$rootfs/etc/network/interfaces" + local -a bk_ifaces=() missing_ifaces=() wired_missing=() orphan_missing=() + mapfile -t bk_ifaces < <( + grep -E '^(iface|auto)[[:space:]]' "$ifaces_file" 2>/dev/null \ + | awk '{print $2}' \ + | sort -u \ + | grep -vE '^(lo|vmbr[0-9]+|bond[0-9]+|tap.*|veth.*|fwbr.*|fwln.*|fwpr.*)$' \ + | grep -vE '\.[0-9]+$' # strip VLAN sub-ifaces + ) + local i + for i in "${bk_ifaces[@]}"; do + [[ -z "$i" ]] && continue + if ! ip -o link show "$i" >/dev/null 2>&1; then + missing_ifaces+=("$i") + fi + done + # Classify each missing NIC as wired vs orphan declaration. + for i in "${missing_ifaces[@]}"; do + # Match: `auto `, `bridge-ports ... `, `bridge_ports ... `, + # `bond-slaves ... `, `bond_slaves ... `, `slaves ... ` + if grep -qE "(^auto[[:space:]]+${i}\$|bridge[-_]ports[[:space:]]+.*\b${i}\b|bond[-_]slaves[[:space:]]+.*\b${i}\b|^[[:space:]]*slaves[[:space:]]+.*\b${i}\b)" "$ifaces_file"; then + wired_missing+=("$i") + else + orphan_missing+=("$i") + fi + done + + if [[ ${#bk_ifaces[@]} -eq 0 ]]; then + : # nothing to check + elif [[ ${#missing_ifaces[@]} -eq 0 ]]; then + HB_COMPAT_RESULTS+=("PASS|Network|$(hb_translate "All physical interfaces from backup are present on target")") + else + if [[ ${#wired_missing[@]} -gt 0 ]]; then + HB_COMPAT_RESULTS+=("FAIL|Network|$(hb_translate "Wired NICs in backup missing on target:") ${wired_missing[*]} ($(hb_translate "restoring /etc/network would lose connectivity"))") + HB_COMPAT_ANY_FAIL=1 + fi + if [[ ${#orphan_missing[@]} -gt 0 ]]; then + # Orphan iface declarations — harmless leftover from older + # hardware. Surface as PASS so the operator knows we + # noticed, but don't trigger the FAIL gate. + HB_COMPAT_RESULTS+=("PASS|Network|$(hb_translate "Backup declares unused NICs that are not on this host:") ${orphan_missing[*]} ($(hb_translate "orphan iface lines, no impact on restore"))") + fi + fi + fi + + # --- USER PACKAGES --- + # Compare `apt-mark showmanual` from backup vs current. Any + # package the operator installed deliberately on the source + # host but missing on the target will eventually cause an + # orphan systemd unit or a "command not found" — surface + # those up-front so the operator can decide to install them + # via the "Install missing packages" apply option. + if [[ -f "$meta/packages.manual.list" ]] && command -v apt-mark >/dev/null 2>&1; then + local cur_pkgs_file + cur_pkgs_file=$(mktemp) + apt-mark showmanual 2>/dev/null | sort -u > "$cur_pkgs_file" + local -a missing_pkgs=() + mapfile -t missing_pkgs < <(comm -23 <(sort -u "$meta/packages.manual.list") "$cur_pkgs_file") + rm -f "$cur_pkgs_file" + if [[ ${#missing_pkgs[@]} -eq 0 ]]; then + HB_COMPAT_RESULTS+=("PASS|Packages|$(hb_translate "All user-installed packages from the backup are present on this host")") + else + local list_str + if [[ ${#missing_pkgs[@]} -le 6 ]]; then + list_str="${missing_pkgs[*]}" + else + list_str="${missing_pkgs[*]:0:6}… (+ $((${#missing_pkgs[@]} - 6)) $(hb_translate "more"))" + fi + HB_COMPAT_RESULTS+=("WARN|Packages|${#missing_pkgs[@]} $(hb_translate "user-installed packages from backup are missing here:") $list_str") + HB_COMPAT_ANY_WARN=1 + fi + fi + + # --- VMID OVERLAP --- + # On a same-host restore, overlapping guest IDs are expected (the + # backup snapshotted YOUR live VMs, so of course they match). We + # only flag it when the restore would actually overwrite live + # guest configs — i.e. cross-host AND there's overlap. + # Note: by default the host-backup restore flow does NOT restore + # /etc/pve/nodes/* (it's part of the opt-in cluster_cfg path), but + # if the operator later toggles that path on, this is the warning + # they'd need to have seen. + if [[ -d "$rootfs/etc/pve/nodes" ]] && [[ "$HB_COMPAT_SAME_HOST" != "1" ]]; then + local -a bk_pcts=() bk_qms=() current_pcts=() current_qms=() + [[ -f "$meta/pct-list.txt" ]] && mapfile -t bk_pcts < <(awk '/^[[:space:]]*[0-9]+/{print $1}' "$meta/pct-list.txt") + [[ -f "$meta/qm-list.txt" ]] && mapfile -t bk_qms < <(awk '/^[[:space:]]*[0-9]+/{print $1}' "$meta/qm-list.txt") + command -v pct >/dev/null 2>&1 && mapfile -t current_pcts < <(pct list 2>/dev/null | awk 'NR>1 {print $1}') + command -v qm >/dev/null 2>&1 && mapfile -t current_qms < <(qm list 2>/dev/null | awk 'NR>1 {print $1}') + local pct_overlap=0 qm_overlap=0 id cid + for id in "${bk_pcts[@]}"; do + for cid in "${current_pcts[@]}"; do [[ "$id" == "$cid" ]] && ((pct_overlap++)); done + done + for id in "${bk_qms[@]}"; do + for cid in "${current_qms[@]}"; do [[ "$id" == "$cid" ]] && ((qm_overlap++)); done + done + if (( pct_overlap + qm_overlap > 0 )); then + HB_COMPAT_RESULTS+=("WARN|VM/CT IDs|$(hb_translate "Cross-host restore: guest IDs in backup overlap live IDs on target:") LXC=$pct_overlap, QEMU=$qm_overlap") + HB_COMPAT_ANY_WARN=1 + fi + fi +} + +# Render HB_COMPAT_RESULTS in a dialog. Returns 0 to continue, 1 to +# abort. FAIL forces an explicit second confirmation; WARN shows the +# report and lets the user proceed; an all-PASS report only shows up +# briefly so the user can see it succeeded. +hb_show_compat_report() { + local pass=0 warn=0 fail=0 line status rest cat msg + local report="" + for line in "${HB_COMPAT_RESULTS[@]}"; do + status="${line%%|*}"; rest="${line#*|}" + cat="${rest%%|*}"; msg="${rest#*|}" + case "$status" in + PASS) ((pass++)); report+=$' [OK] '"${cat}"$' — '"${msg}"$'\n' ;; + WARN) ((warn++)); report+=$' [WARN] '"${cat}"$' — '"${msg}"$'\n' ;; + FAIL) ((fail++)); report+=$' [FAIL] '"${cat}"$' — '"${msg}"$'\n' ;; + esac + done + + local summary + summary="$(hb_translate "Compatibility check"): " + summary+="${pass} pass, ${warn} warn, ${fail} fail" + + local tmpfile + tmpfile=$(mktemp) + { + printf '%s\n' "$summary" + printf '%s\n\n' "────────────────────────────────────────────────────────────" + printf '%s\n' "$report" + } > "$tmpfile" + + local title + if (( fail > 0 )); then + title="$(hb_translate "Compatibility check — issues detected")" + elif (( warn > 0 )); then + title="$(hb_translate "Compatibility check — review warnings")" + else + title="$(hb_translate "Compatibility check — OK")" + fi + + dialog --backtitle "ProxMenux" --title "$title" \ + --textbox "$tmpfile" 22 86 || true + rm -f "$tmpfile" + + # FAIL means at least one check is a real risk for system integrity + # — force a second yes/no with default NO before letting the user + # press on. + if (( fail > 0 )); then + if ! whiptail --title "$(hb_translate "Continue despite failures?")" \ + --defaultno \ + --yesno "$(hb_translate "The compatibility check raised failures that may break the system after restore.")"$'\n\n'"$(hb_translate "Continue anyway?")" \ + 11 78; then + return 1 + fi + fi + return 0 +} + +# ========================================================== +# Archive sidecar — explicit ProxMenux backup marker. +# +# Drops a small JSON next to a completed archive so the Monitor +# (and any future tooling) can identify it as a ProxMenux host +# backup independently of the filename. The user can rename the +# .tar.zst to whatever they want and the sidecar travels with it +# as long as they keep the same basename pair. +# +# Usage: +# hb_write_archive_sidecar [job_id] [profile] +# kind: "manual" or "scheduled" +# job_id: scheduler job id (empty for manual) +# profile: "default", "custom", or empty +# Fail-soft: returns 0 even if jq is missing and we have to fall +# back to printf-built JSON; never aborts the surrounding backup. +# ========================================================== +hb_write_archive_sidecar() { + local archive_path="$1" + local kind="${2:-}" + local job_id="${3:-}" + local profile="${4:-}" + [[ -z "$archive_path" || ! -f "$archive_path" ]] && return 1 + local sidecar="${archive_path}.proxmenux.json" + local archive_basename hostname_val created_at archive_size + archive_basename=$(basename "$archive_path") + hostname_val=$(hostname 2>/dev/null || echo "unknown") + created_at=$(date -Iseconds 2>/dev/null || date '+%Y-%m-%dT%H:%M:%S%z') + archive_size=$(stat -c %s "$archive_path" 2>/dev/null || echo 0) + + if command -v jq >/dev/null 2>&1; then + jq -n \ + --arg kind "$kind" \ + --arg job_id "$job_id" \ + --arg profile "$profile" \ + --arg hostname "$hostname_val" \ + --arg archive "$archive_basename" \ + --arg created_at "$created_at" \ + --argjson size "$archive_size" \ + '{ + schema_version: 1, + kind: $kind, + job_id: (if $job_id == "" then null else $job_id end), + profile: (if $profile == "" then null else $profile end), + hostname: $hostname, + archive: $archive, + created_at: $created_at, + archive_size: $size + }' > "$sidecar" 2>/dev/null && return 0 + fi + + # Fallback: jq unavailable — emit JSON by hand. Fields are + # controlled by us (no untrusted strings besides hostname/path + # which we already constrain via shell context), so a small + # printf is safe enough. + { + printf '{\n' + printf ' "schema_version": 1,\n' + printf ' "kind": "%s",\n' "$kind" + if [[ -n "$job_id" ]]; then + printf ' "job_id": "%s",\n' "$job_id" + else + printf ' "job_id": null,\n' + fi + if [[ -n "$profile" ]]; then + printf ' "profile": "%s",\n' "$profile" + else + printf ' "profile": null,\n' + fi + printf ' "hostname": "%s",\n' "$hostname_val" + printf ' "archive": "%s",\n' "$archive_basename" + printf ' "created_at": "%s",\n' "$created_at" + printf ' "archive_size": %s\n' "$archive_size" + printf '}\n' + } > "$sidecar" 2>/dev/null + return 0 +} diff --git a/scripts/backup_restore/restore/parse_manifest.sh b/scripts/backup_restore/restore/parse_manifest.sh new file mode 100644 index 00000000..3b75c79b --- /dev/null +++ b/scripts/backup_restore/restore/parse_manifest.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +# ========================================================== +# ProxMenux backup restore — manifest reader +# ========================================================== +# Reads the JSON manifest from a ProxMenux host backup. Supports: +# - A loose manifest.json file path +# - A backup archive (.tar.gz / .tar.zst / .tar) +# - A pre-extracted backup directory +# +# Emits the manifest's `proxmenux_backup_manifest` sub-object as +# JSON to stdout (i.e. unwraps the top-level key) so downstream +# scripts can use `jq '.source_host'` directly. Exit 0 on success, +# non-zero with a message on stderr if the manifest can't be found. +# +# Usage: +# parse_manifest.sh [--with-wrapper] +# +# --with-wrapper keeps the outer { proxmenux_backup_manifest: { ... } } +# wrap (useful when piping to jsonschema validation). +# ========================================================== +set -euo pipefail + +SOURCE="${1:-}" +KEEP_WRAPPER=0 +shift || true +while [[ $# -gt 0 ]]; do + case "$1" in + --with-wrapper) KEEP_WRAPPER=1 ;; + esac + shift +done + +if [[ -z "$SOURCE" ]]; then + printf 'parse_manifest: missing source path\n' >&2 + exit 64 +fi + +# Locate the manifest. Three input shapes: +manifest_json="" +case "$SOURCE" in + *.tar.gz|*.tgz|*.tar.zst|*.tar) + # Archive — extract just the manifest entry to stdout. We tolerate + # the manifest sitting at the root OR under any meta/ subdirectory. + extractor=() + case "$SOURCE" in + *.tar.zst) extractor=(zstd -d --long=27 -c "$SOURCE") ;; + *.tar.gz|*.tgz) extractor=(gzip -dc "$SOURCE") ;; + *.tar) extractor=(cat "$SOURCE") ;; + esac + # Use --wildcards so the manifest is found at any depth. We extract + # to stdout and stop at the first match. + if ! manifest_json="$("${extractor[@]}" | tar -xO --wildcards '*manifest.json' 2>/dev/null | head -c 4194304)"; then + printf 'parse_manifest: no manifest.json found inside %s\n' "$SOURCE" >&2 + exit 65 + fi + ;; + *) + if [[ -f "$SOURCE" ]]; then + manifest_json="$(cat "$SOURCE")" + elif [[ -d "$SOURCE" ]]; then + # Pre-extracted directory — try common paths first, then a search. + for candidate in "$SOURCE/manifest.json" "$SOURCE/meta/manifest.json"; do + if [[ -f "$candidate" ]]; then + manifest_json="$(cat "$candidate")"; break + fi + done + if [[ -z "$manifest_json" ]]; then + found="$(find "$SOURCE" -maxdepth 3 -name 'manifest.json' -print -quit 2>/dev/null || true)" + [[ -n "$found" ]] && manifest_json="$(cat "$found")" + fi + if [[ -z "$manifest_json" ]]; then + printf 'parse_manifest: no manifest.json under %s\n' "$SOURCE" >&2 + exit 65 + fi + else + printf 'parse_manifest: %s is neither archive, dir, nor file\n' "$SOURCE" >&2 + exit 66 + fi + ;; +esac + +# Verify it's at least valid JSON before unwrapping. +if ! printf '%s' "$manifest_json" | jq -e 'type == "object"' >/dev/null 2>&1; then + printf 'parse_manifest: contents are not a JSON object\n' >&2 + exit 67 +fi + +# Check the wrapper key is present. +if ! printf '%s' "$manifest_json" | jq -e '.proxmenux_backup_manifest' >/dev/null 2>&1; then + printf 'parse_manifest: missing proxmenux_backup_manifest key (not a ProxMenux manifest?)\n' >&2 + exit 68 +fi + +if [[ "$KEEP_WRAPPER" == 1 ]]; then + printf '%s' "$manifest_json" +else + printf '%s' "$manifest_json" | jq '.proxmenux_backup_manifest' +fi diff --git a/scripts/backup_restore/restore/preflight_checks.sh b/scripts/backup_restore/restore/preflight_checks.sh new file mode 100644 index 00000000..92568aa3 --- /dev/null +++ b/scripts/backup_restore/restore/preflight_checks.sh @@ -0,0 +1,239 @@ +#!/usr/bin/env bash +# ========================================================== +# ProxMenux backup restore — pre-flight compatibility checks +# ========================================================== +# Runs every pre-flight check against the destination host's current +# state and emits a JSON report. The orchestrator (run_restore.sh) +# decides go/no-go based on whether any check has severity=fail. +# +# Severity levels: +# pass — green, restore can proceed for this dimension +# warn — proceed but operator should know (e.g. RAM lower than source, +# NIC MAC absent, PBS role missing but PVE present) +# fail — must address before proceeding (e.g. CPU arch mismatch, +# PVE version older than backup) +# +# Usage: +# preflight_checks.sh +# +# Stdout: JSON {checks: [...], summary: {pass: N, warn: N, fail: N}}. +# Exit code: 0 if all checks pass or warn; 1 if any fail. +# ========================================================== +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE="${1:-}" + +if [[ -z "$SOURCE" ]]; then + printf 'preflight_checks: missing manifest source\n' >&2 + exit 64 +fi + +manifest="$(bash "$SCRIPT_DIR/parse_manifest.sh" "$SOURCE")" + +# Collect "current host" facts up-front so the checks themselves +# stay declarative. +cur_hostname="$(hostname)" +cur_pve_full="$(pveversion 2>/dev/null | head -1 || true)" +cur_pve_ver="$(printf '%s\n' "$cur_pve_full" | sed -nE 's@^pve-manager/([0-9.]+)/.*@\1@p')" +cur_pbs_present=0 +command -v proxmox-backup-manager >/dev/null 2>&1 && cur_pbs_present=1 +cur_kernel="$(uname -r)" +cur_boot_mode="$([ -d /sys/firmware/efi ] && echo efi || echo bios)" +cur_root_fs="$(findmnt -no FSTYPE / 2>/dev/null || echo unknown)" +cur_cpu_arch="$(uname -m)" +case "$cur_cpu_arch" in x86_64|amd64) cur_cpu_arch=x86_64 ;; aarch64|arm64) cur_cpu_arch=aarch64 ;; esac +cur_memory_kb="$(awk '/^MemTotal:/{print $2; exit}' /proc/meminfo 2>/dev/null || echo 0)" + +# Manifest-side facts +m_source="$(printf '%s' "$manifest" | jq -c '.source_host')" +m_pve="$(printf '%s' "$m_source" | jq -r '.pve_version // ""')" +m_pbs="$(printf '%s' "$m_source" | jq -r '.pbs_version // ""')" +m_roles="$(printf '%s' "$m_source" | jq -c '.roles')" +m_boot_mode="$(printf '%s' "$m_source" | jq -r '.boot_mode')" +m_root_fs="$(printf '%s' "$m_source" | jq -r '.root_fs // ""')" +m_cpu_arch="$(printf '%s' "$m_source" | jq -r '.cpu_arch')" +m_memory_kb="$(printf '%s' "$m_source" | jq -r '.memory_kb')" +m_hostname="$(printf '%s' "$m_source" | jq -r '.hostname')" + +checks='[]' + +# Helper to compare semver-style strings as tuples. Returns 0 if $1 ≥ $2. +ver_ge() { + # Pad both to (major,minor,patch) and compare numerically. + local a b + IFS='.' read -ra a <<< "$1" + IFS='.' read -ra b <<< "$2" + for i in 0 1 2; do + local av="${a[$i]:-0}" bv="${b[$i]:-0}" + av="${av%%-*}"; bv="${bv%%-*}" # strip pre-release suffixes + av="$(printf '%d' "$av" 2>/dev/null || echo 0)" + bv="$(printf '%d' "$bv" 2>/dev/null || echo 0)" + if (( av > bv )); then return 0 + elif (( av < bv )); then return 1 + fi + done + return 0 +} + +add_check() { + local id="$1" severity="$2" message="$3" details="${4:-null}" + checks="$(jq --argjson acc "$checks" \ + --arg id "$id" --arg sev "$severity" --arg msg "$message" --argjson det "$details" \ + -n '$acc + [{id: $id, severity: $sev, message: $msg, details: $det}]')" +} + +# ── Check 1: CPU arch must match ── +if [[ "$cur_cpu_arch" == "$m_cpu_arch" ]]; then + add_check cpu_arch_match pass "CPU arch matches ($cur_cpu_arch)" +else + add_check cpu_arch_match fail \ + "Source $m_cpu_arch ≠ destination $cur_cpu_arch — backup is not portable across architectures" \ + "$(jq -n --arg s "$m_cpu_arch" --arg d "$cur_cpu_arch" '{source: $s, destination: $d}')" +fi + +# ── Check 2: Boot mode (efi vs bios) ── +if [[ "$cur_boot_mode" == "$m_boot_mode" ]]; then + add_check boot_mode_match pass "Boot mode matches ($cur_boot_mode)" +else + add_check boot_mode_match warn \ + "Source $m_boot_mode ≠ destination $cur_boot_mode. Bootloader config from the backup will not apply." \ + "$(jq -n --arg s "$m_boot_mode" --arg d "$cur_boot_mode" '{source: $s, destination: $d}')" +fi + +# ── Check 3: Root filesystem family ── +if [[ -n "$m_root_fs" ]]; then + if [[ "$cur_root_fs" == "$m_root_fs" ]]; then + add_check root_fs_match pass "Root filesystem matches ($cur_root_fs)" + else + add_check root_fs_match warn \ + "Source root_fs=$m_root_fs vs destination $cur_root_fs. Fine for config-only restore, but ZFS-specific paths from the backup may need manual adjustment." \ + "$(jq -n --arg s "$m_root_fs" --arg d "$cur_root_fs" '{source: $s, destination: $d}')" + fi +fi + +# ── Check 4: PVE version ── +if [[ -n "$m_pve" ]]; then + if [[ -z "$cur_pve_ver" ]]; then + add_check pve_version fail \ + "Source had PVE $m_pve but destination has no PVE installed" \ + "$(jq -n --arg s "$m_pve" '{source_version: $s, destination_version: null}')" + elif ver_ge "$cur_pve_ver" "$m_pve"; then + add_check pve_version pass \ + "Destination PVE $cur_pve_ver ≥ source $m_pve" + else + add_check pve_version warn \ + "Destination PVE $cur_pve_ver is OLDER than source $m_pve. New config files may reference fields the older PVE doesn't recognise." \ + "$(jq -n --arg s "$m_pve" --arg d "$cur_pve_ver" '{source: $s, destination: $d}')" + fi +fi + +# ── Check 5: PBS role ── +roles_have_pbs="$(printf '%s' "$m_roles" | jq 'index("pbs") != null')" +if [[ "$roles_have_pbs" == "true" ]]; then + if [[ "$cur_pbs_present" == 1 ]]; then + add_check pbs_role pass "Destination has PBS — manifest's pbs role can be restored" + else + add_check pbs_role warn \ + "Source had PBS role but destination has no PBS installed. PBS-related configs will be ignored unless you install proxmox-backup-server first." + fi +fi + +# ── Check 6: Memory ── +if [[ "$m_memory_kb" -gt 0 && "$cur_memory_kb" -gt 0 ]]; then + # 80% rule — destination must have at least 80% of source RAM. + threshold_kb=$(( m_memory_kb * 80 / 100 )) + if [[ "$cur_memory_kb" -ge "$m_memory_kb" ]]; then + add_check memory pass "Destination $(( cur_memory_kb / 1024 ))MB ≥ source $(( m_memory_kb / 1024 ))MB" + elif [[ "$cur_memory_kb" -ge "$threshold_kb" ]]; then + add_check memory warn \ + "Destination $(( cur_memory_kb / 1024 ))MB is below source $(( m_memory_kb / 1024 ))MB but within 80% threshold. VMs may need memory limits reduced." + else + add_check memory fail \ + "Destination $(( cur_memory_kb / 1024 ))MB is below 80% of source $(( m_memory_kb / 1024 ))MB. VMs from the backup will likely refuse to start." \ + "$(jq -n --argjson s "$m_memory_kb" --argjson d "$cur_memory_kb" '{source_kb: $s, destination_kb: $d}')" + fi +fi + +# ── Check 7: Required by-id disks present ── +required_disks="$(printf '%s' "$manifest" | jq -r ' + [.storage_inventory.zfs_pools[]?.devices_by_id[]?] + + [.storage_inventory.physical_disks[]?.by_id // empty] + | unique[] +' | grep -v '^$' || true)" + +missing_disks='[]' +present_disks='[]' +while IFS= read -r dev; do + [[ -z "$dev" ]] && continue + if [[ -e "/dev/disk/by-id/$dev" ]]; then + present_disks="$(jq --argjson acc "$present_disks" --arg d "$dev" -n '$acc + [$d]')" + else + missing_disks="$(jq --argjson acc "$missing_disks" --arg d "$dev" -n '$acc + [$d]')" + fi +done <<< "$required_disks" + +missing_count="$(printf '%s' "$missing_disks" | jq 'length')" +present_count="$(printf '%s' "$present_disks" | jq 'length')" +total_count=$(( missing_count + present_count )) + +if [[ "$total_count" == 0 ]]; then + add_check disk_inventory pass "Manifest declares no by-id disks (no ZFS pools to import)" +elif [[ "$missing_count" == 0 ]]; then + add_check disk_inventory pass "All $present_count required by-id disks present" \ + "$(jq -n --argjson p "$present_disks" '{present: $p}')" +else + add_check disk_inventory warn \ + "$missing_count of $total_count required by-id disks are missing. Affected ZFS pools / storages cannot auto-import." \ + "$(jq -n --argjson m "$missing_disks" --argjson p "$present_disks" '{missing: $m, present: $p}')" +fi + +# ── Check 8: NIC MACs present ── +required_macs="$(printf '%s' "$manifest" | jq -r '.hardware_inventory.nic[]?.mac // empty')" +current_macs="$(ip -j link 2>/dev/null | jq -r '.[].address' 2>/dev/null | sort -u)" + +missing_macs='[]' +matched_macs='[]' +while IFS= read -r mac; do + [[ -z "$mac" ]] && continue + if printf '%s\n' "$current_macs" | grep -qFx "$mac"; then + matched_macs="$(jq --argjson acc "$matched_macs" --arg m "$mac" -n '$acc + [$m]')" + else + missing_macs="$(jq --argjson acc "$missing_macs" --arg m "$mac" -n '$acc + [$m]')" + fi +done <<< "$required_macs" + +mac_missing="$(printf '%s' "$missing_macs" | jq 'length')" +mac_total=$(( mac_missing + $(printf '%s' "$matched_macs" | jq 'length') )) +if [[ "$mac_total" == 0 ]]; then + add_check nic_macs pass "Manifest declares no NICs" +elif [[ "$mac_missing" == 0 ]]; then + add_check nic_macs pass "All $mac_total NIC MACs from source present on destination" +else + add_check nic_macs warn \ + "$mac_missing of $mac_total source NIC MACs are absent. Bridge memberships referencing those interfaces will need manual remap." \ + "$(jq -n --argjson m "$missing_macs" --argjson p "$matched_macs" '{missing: $m, matched: $p}')" +fi + +# ── Check 9: Hostname collision ── +if [[ "$cur_hostname" == "$m_hostname" ]]; then + add_check hostname pass "Hostname unchanged ($cur_hostname)" +else + add_check hostname warn \ + "Source hostname '$m_hostname' ≠ destination '$cur_hostname'. Restoring /etc/hostname will change it; this affects PVE cluster identity and some certs." \ + "$(jq -n --arg s "$m_hostname" --arg d "$cur_hostname" '{source: $s, destination: $d}')" +fi + +# Compose final report +summary="$(printf '%s' "$checks" | jq ' + reduce .[] as $c ({pass: 0, warn: 0, fail: 0}; + .[$c.severity] += 1) +')" +fail_count="$(printf '%s' "$summary" | jq '.fail')" + +jq -n \ + --argjson checks "$checks" \ + --argjson summary "$summary" \ + '{ checks: $checks, summary: $summary }' + +exit "$([ "$fail_count" -eq 0 ] && echo 0 || echo 1)" diff --git a/scripts/backup_restore/restore/reinstall_drivers.sh b/scripts/backup_restore/restore/reinstall_drivers.sh new file mode 100644 index 00000000..4a491840 --- /dev/null +++ b/scripts/backup_restore/restore/reinstall_drivers.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# ========================================================== +# ProxMenux backup restore — driver reinstaller +# ========================================================== +# Walks the manifest's proxmenux_installed_components list and +# emits a plan (--dry-run, default) or actually invokes the +# installers (--apply). Each installer is called with: +# +# bash --auto-from-manifest \ +# --version \ +# --id +# +# The installers themselves are responsible for honoring those +# flags and running non-interactively. This script does NOT touch +# the host directly — it only delegates to the existing installers. +# +# Usage: +# reinstall_drivers.sh [--apply] +# +# Output: JSON {plan: [...], applied: [...] (only with --apply)}. +# ========================================================== +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROXMENUX_ROOT="/usr/local/share/proxmenux" # where the installers live at runtime + +SOURCE="${1:-}" +APPLY=0 +shift || true +while [[ $# -gt 0 ]]; do + case "$1" in + --apply) APPLY=1 ;; + --root) shift; PROXMENUX_ROOT="$1" ;; + esac + shift +done + +[[ -z "$SOURCE" ]] && { printf 'reinstall_drivers: missing manifest source\n' >&2; exit 64; } + +manifest="$(bash "$SCRIPT_DIR/parse_manifest.sh" "$SOURCE")" + +plan='[]' +applied='[]' + +while IFS= read -r comp; do + [[ -z "$comp" ]] && continue + id="$(printf '%s' "$comp" | jq -r '.id')" + type="$(printf '%s' "$comp" | jq -r '.type // ""')" + version="$(printf '%s' "$comp" | jq -r '.version_at_backup // ""')" + installer_rel="$(printf '%s' "$comp" | jq -r '.proxmenux_installer // ""')" + + # Components without an installer are reinstalled manually by the + # operator after restore (e.g. OCI apps like Tailscale). We still + # surface them in the plan so the operator has the full list. + if [[ -z "$installer_rel" ]]; then + plan="$(jq --argjson acc "$plan" \ + --arg id "$id" --arg type "$type" --arg version "$version" \ + -n '$acc + [{ + component_id: $id, + type: $type, + version: $version, + installer: null, + action: "manual_reinstall_required", + reason: "component has no installer mapping — operator must reinstall manually" + }]')" + continue + fi + + installer_abs="$PROXMENUX_ROOT/$installer_rel" + if [[ ! -f "$installer_abs" ]]; then + plan="$(jq --argjson acc "$plan" \ + --arg id "$id" --arg type "$type" --arg version "$version" --arg ir "$installer_rel" \ + -n '$acc + [{ + component_id: $id, + type: $type, + version: $version, + installer: $ir, + action: "installer_missing", + reason: "installer script not present on this host — ProxMenux installation incomplete?" + }]')" + continue + fi + + plan="$(jq --argjson acc "$plan" \ + --arg id "$id" --arg type "$type" --arg version "$version" --arg ir "$installer_rel" \ + -n '$acc + [{ + component_id: $id, + type: $type, + version: $version, + installer: $ir, + action: "will_invoke_installer", + reason: "bash --auto-from-manifest --version --id " + }]')" + + if [[ "$APPLY" == 1 ]]; then + started_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)" + if bash "$installer_abs" --auto-from-manifest --version "$version" --id "$id" \ + >/tmp/proxmenux-restore-install-"$id".log 2>&1; then + result="ok"; exit_code=0 + else + exit_code=$? + result="failed" + fi + finished_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)" + + applied="$(jq --argjson acc "$applied" \ + --arg id "$id" --arg result "$result" --argjson ec "$exit_code" \ + --arg s "$started_at" --arg f "$finished_at" \ + -n '$acc + [{ + component_id: $id, + result: $result, + exit_code: $ec, + started_at: $s, + finished_at: $f, + log: ("/tmp/proxmenux-restore-install-" + $id + ".log") + }]')" + fi +done < <(printf '%s' "$manifest" | jq -c '.proxmenux_installed_components[]?') + +if [[ "$APPLY" == 1 ]]; then + jq -n --argjson plan "$plan" --argjson applied "$applied" '{plan: $plan, applied: $applied}' +else + jq -n --argjson plan "$plan" '{plan: $plan}' +fi diff --git a/scripts/backup_restore/restore/remap_network.sh b/scripts/backup_restore/restore/remap_network.sh new file mode 100644 index 00000000..122282d3 --- /dev/null +++ b/scripts/backup_restore/restore/remap_network.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash +# ========================================================== +# ProxMenux backup restore — NIC remap by MAC +# ========================================================== +# Compares the manifest's NIC list (ifname + MAC + bridges) against +# the destination's current state and produces a remap table. +# +# Decision rules per NIC: +# - MAC found on the SAME ifname → keep (no action) +# - MAC found on a DIFFERENT ifname → rename or rewrite bridge config +# - MAC NOT found at all → orphan: bridge member needs +# human decision +# - Destination has a NIC not in manifest → new hardware: no action +# needed for restore, but +# operator may want to add +# to a bridge afterwards +# +# Usage: +# remap_network.sh +# +# Output: JSON {keep: [...], remap: [...], orphan: [...], new: [...]}. +# ========================================================== +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE="${1:-}" +[[ -z "$SOURCE" ]] && { printf 'remap_network: missing manifest source\n' >&2; exit 64; } + +manifest="$(bash "$SCRIPT_DIR/parse_manifest.sh" "$SOURCE")" + +# Snapshot destination NICs. +dest_nics='[]' +for dev_path in /sys/class/net/*; do + ifname="$(basename "$dev_path")" + case "$ifname" in + lo|veth*|tap*|fwln*|fwbr*|fwpr*|vmbr*|bond*) continue ;; + esac + [[ -e "$dev_path/device" ]] || continue + mac="$(cat "$dev_path/address" 2>/dev/null || true)" + [[ -z "$mac" ]] && continue + dest_nics="$(jq --argjson acc "$dest_nics" --arg n "$ifname" --arg m "$mac" \ + -n '$acc + [{ifname: $n, mac: $m}]')" +done + +# Manifest NICs +manifest_nics="$(printf '%s' "$manifest" | jq -c '.hardware_inventory.nic // []')" + +keep='[]' +remap='[]' +orphan='[]' + +# Iterate manifest NICs +while IFS= read -r src_nic; do + [[ -z "$src_nic" ]] && continue + src_if="$(printf '%s' "$src_nic" | jq -r '.ifname')" + src_mac="$(printf '%s' "$src_nic" | jq -r '.mac')" + src_bridges="$(printf '%s' "$src_nic" | jq -c '.in_bridges // []')" + + # Look up the same MAC on destination + match="$(printf '%s' "$dest_nics" | jq -c --arg m "$src_mac" '.[] | select(.mac == $m)' | head -1)" + if [[ -z "$match" ]]; then + # MAC not found at all → orphan + orphan="$(jq --argjson acc "$orphan" \ + --arg if "$src_if" --arg mac "$src_mac" --argjson b "$src_bridges" \ + -n '$acc + [{ + source_ifname: $if, + source_mac: $mac, + in_bridges: $b + }]')" + continue + fi + dest_if="$(printf '%s' "$match" | jq -r '.ifname')" + if [[ "$dest_if" == "$src_if" ]]; then + keep="$(jq --argjson acc "$keep" \ + --arg if "$src_if" --arg mac "$src_mac" \ + -n '$acc + [{ifname: $if, mac: $mac}]')" + else + remap="$(jq --argjson acc "$remap" \ + --arg si "$src_if" --arg di "$dest_if" --arg mac "$src_mac" --argjson b "$src_bridges" \ + -n '$acc + [{ + source_ifname: $si, + destination_ifname: $di, + mac: $mac, + in_bridges: $b + }]')" + fi +done < <(printf '%s' "$manifest_nics" | jq -c '.[]') + +# Destination NICs that weren't in the manifest at all → new hardware +manifest_macs="$(printf '%s' "$manifest_nics" | jq -r '.[].mac')" +new='[]' +while IFS= read -r dest_nic; do + [[ -z "$dest_nic" ]] && continue + dest_mac="$(printf '%s' "$dest_nic" | jq -r '.mac')" + if ! printf '%s\n' "$manifest_macs" | grep -qFx "$dest_mac"; then + new="$(jq --argjson acc "$new" --argjson n "$dest_nic" -n '$acc + [$n]')" + fi +done < <(printf '%s' "$dest_nics" | jq -c '.[]') + +jq -n \ + --argjson keep "$keep" \ + --argjson remap "$remap" \ + --argjson orphan "$orphan" \ + --argjson new "$new" \ + '{ keep: $keep, remap: $remap, orphan: $orphan, new: $new }' diff --git a/scripts/backup_restore/restore/restore_modes.sh b/scripts/backup_restore/restore/restore_modes.sh new file mode 100644 index 00000000..845ef184 --- /dev/null +++ b/scripts/backup_restore/restore/restore_modes.sh @@ -0,0 +1,220 @@ +#!/usr/bin/env bash +# ========================================================== +# ProxMenux backup restore — mode presets +# ========================================================== +# Defines the five canonical restore modes. Each mode is a +# declarative filter over the manifest: +# +# full — restore everything from the backup +# storage_only — only PVE storages, ZFS pools, mounts +# network_only — only /etc/network, hostname, hosts, firewall +# base — full minus network (operator keeps current LAN) +# custom — pass-through; the caller decides paths/components +# +# Each mode takes the manifest on stdin and prints a plan JSON +# to stdout. The plan tells run_restore.sh which paths to extract, +# which components to reinstall, and whether to apply storage / +# network actions. +# +# Plan schema: +# { +# mode: "full" | ... , +# paths_include: [string, ...], // paths to extract +# paths_exclude: [string, ...], // paths to skip +# components_include: [string, ...], // component ids to reinstall +# storage_apply: bool, +# network_apply: bool, +# hostname_apply: bool +# } +# +# Usage as a library: +# source restore_modes.sh +# plan="$(mode_plan_full < manifest.json)" +# +# Usage as a CLI: +# restore_modes.sh +# +# Modes consume the manifest's paths_archived list — they don't +# invent paths. Anything you didn't archive can't be restored. +# ========================================================== +set -euo pipefail + +# Paths that belong to the "network" concern, used by base/network_only +# modes. We match prefixes (e.g. /etc/network covers everything under it). +_NETWORK_PATH_PREFIXES=( + "/etc/network" + "/etc/hosts" + "/etc/hostname" + "/etc/resolv.conf" + "/etc/pve/firewall" + "/etc/pve/nodes" + "/etc/pve/.members" +) + +# Paths that belong to the "storage" concern. +_STORAGE_PATH_PREFIXES=( + "/etc/pve/storage.cfg" + "/etc/pve/priv/storage" + "/etc/fstab" + "/etc/iscsi" + "/etc/multipath" + "/etc/multipath.conf" + "/etc/zfs" + "/etc/lvm" +) + +# Internal: returns 0 if $1 starts with any of the prefixes in the +# named array. +_path_matches_any() { + local path="$1"; shift + local prefix + for prefix in "$@"; do + case "$path" in + "$prefix"|"$prefix"/*) return 0 ;; + esac + done + return 1 +} + +# Internal: emit a JSON array of paths from paths_archived that pass the +# given path predicate function name. +_filter_paths() { + local predicate="$1" manifest="$2" + local out='[]' + while IFS= read -r p; do + [[ -z "$p" ]] && continue + if $predicate "$p"; then + out="$(jq --argjson acc "$out" --arg p "$p" -n '$acc + [$p]')" + fi + done < <(printf '%s' "$manifest" | jq -r '.backup_metadata.paths_archived[]?') + printf '%s' "$out" +} + +_is_network_path() { _path_matches_any "$1" "${_NETWORK_PATH_PREFIXES[@]}"; } +_is_storage_path() { _path_matches_any "$1" "${_STORAGE_PATH_PREFIXES[@]}"; } +_is_not_network() { ! _is_network_path "$1"; } + +# Internal: emit the component-ids array, optionally filtered. +# Args: +# $1 = manifest JSON +# $2 = "all" | "none" +_components_for_mode() { + local manifest="$1" policy="$2" + case "$policy" in + all) + printf '%s' "$manifest" | jq '[.proxmenux_installed_components[]?.id]' + ;; + none) + echo '[]' + ;; + esac +} + +# Public: emit a plan JSON for the requested mode given the manifest +# on stdin or as $1. +emit_plan() { + local mode="$1" manifest="$2" + + local include exclude components storage_apply network_apply hostname_apply + + case "$mode" in + full) + include="$(printf '%s' "$manifest" | jq '.backup_metadata.paths_archived // []')" + exclude='[]' + components="$(_components_for_mode "$manifest" all)" + storage_apply=true; network_apply=true; hostname_apply=true + ;; + + storage_only) + include="$(_filter_paths _is_storage_path "$manifest")" + exclude='[]' + components='[]' + storage_apply=true; network_apply=false; hostname_apply=false + ;; + + network_only) + include="$(_filter_paths _is_network_path "$manifest")" + exclude='[]' + components='[]' + storage_apply=false; network_apply=true; hostname_apply=true + ;; + + base) + # everything except the network paths + include="$(_filter_paths _is_not_network "$manifest")" + # Explicitly enumerate excluded prefixes so the operator sees them + exclude="$(printf '%s\n' "${_NETWORK_PATH_PREFIXES[@]}" | jq -R . | jq -s .)" + components="$(_components_for_mode "$manifest" all)" + storage_apply=true; network_apply=false; hostname_apply=false + ;; + + custom) + # Pass-through: include nothing, exclude nothing — caller fills in. + include='[]' + exclude='[]' + components='[]' + storage_apply=false; network_apply=false; hostname_apply=false + ;; + + *) + printf 'restore_modes: unknown mode "%s" (expected full|storage_only|network_only|base|custom)\n' "$mode" >&2 + return 64 + ;; + esac + + jq -n \ + --arg mode "$mode" \ + --argjson include "$include" \ + --argjson exclude "$exclude" \ + --argjson components "$components" \ + --argjson storage_apply "$storage_apply" \ + --argjson network_apply "$network_apply" \ + --argjson hostname_apply "$hostname_apply" \ + '{ + mode: $mode, + paths_include: $include, + paths_exclude: $exclude, + components_include: $components, + storage_apply: $storage_apply, + network_apply: $network_apply, + hostname_apply: $hostname_apply + }' +} + +# Public: human-friendly label per mode, used by CLI/UI. +mode_label() { + case "$1" in + full) echo "Full restore — apply everything from the backup" ;; + storage_only) echo "Storage only — PVE storages, ZFS, fstab, iSCSI, multipath" ;; + network_only) echo "Network only — interfaces, hosts, hostname, firewall" ;; + base) echo "Base (no network) — everything except network changes" ;; + custom) echo "Custom — operator picks paths and components manually" ;; + *) echo "Unknown mode" ;; + esac +} + +# CLI mode if called directly +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + MODE="${1:-}" + SOURCE="${2:-}" + if [[ -z "$MODE" || -z "$SOURCE" ]]; then + cat <&2 +restore_modes.sh — restore mode preset definitions + +Usage: + restore_modes.sh + +Modes: + full — $(mode_label full) + storage_only — $(mode_label storage_only) + network_only — $(mode_label network_only) + base — $(mode_label base) + custom — $(mode_label custom) +EOF + exit 64 + fi + + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + manifest="$(bash "$SCRIPT_DIR/parse_manifest.sh" "$SOURCE")" + emit_plan "$MODE" "$manifest" +fi diff --git a/scripts/backup_restore/restore/run_restore.sh b/scripts/backup_restore/restore/run_restore.sh new file mode 100644 index 00000000..2d6c32be --- /dev/null +++ b/scripts/backup_restore/restore/run_restore.sh @@ -0,0 +1,184 @@ +#!/usr/bin/env bash +# ========================================================== +# ProxMenux backup restore — orchestrator +# ========================================================== +# Composes the four manifest-aware tools into a single restore +# workflow: +# +# 1. parse manifest (parse_manifest.sh) +# 2. preflight checks (preflight_checks.sh) ← can fail +# 3. validate storage (validate_storage.sh) ← reports +# 4. network remap plan (remap_network.sh) ← reports +# 5. driver reinstall plan (reinstall_drivers.sh) ← reports +# +# By default it runs the four AS A DRY-RUN and prints the combined +# report. With --apply it executes the file extraction (delegated to +# the existing _rs_apply from backup_host.sh — placeholder for now) +# and then runs the driver reinstaller with --apply. +# +# Usage: +# run_restore.sh [options] +# +# --mode Restore mode preset (default: full) +# full | storage_only | network_only | base | custom +# --json Machine-readable combined report (default) +# --text Human-friendly summary on stderr + JSON report on stdout +# --apply Actually perform the restore (refuses if preflight fails) +# ========================================================== +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE="${1:-}" +FORMAT="json" +APPLY=0 +MODE="full" +shift || true +while [[ $# -gt 0 ]]; do + case "$1" in + --json) FORMAT="json" ;; + --text) FORMAT="text" ;; + --apply) APPLY=1 ;; + --mode) shift; MODE="${1:-full}" ;; + esac + shift +done + +[[ -z "$SOURCE" ]] && { printf 'run_restore: usage: %s [--apply]\n' "$0" >&2; exit 64; } + +# ── Step 1: Parse manifest ── +manifest="$(bash "$SCRIPT_DIR/parse_manifest.sh" "$SOURCE")" + +# ── Step 2: Resolve mode preset (which paths/components/actions apply) ── +mode_plan="$(bash "$SCRIPT_DIR/restore_modes.sh" "$MODE" "$SOURCE")" + +# ── Step 3: Pre-flight checks (gate) ── +preflight="$(bash "$SCRIPT_DIR/preflight_checks.sh" "$SOURCE" || true)" +fail_count="$(printf '%s' "$preflight" | jq '.summary.fail')" + +# ── Step 4: Storage validation ── +# Only report storage if the mode actually applies storage changes; +# otherwise we still surface the info but mark it as "not in mode". +storage_apply_in_mode="$(printf '%s' "$mode_plan" | jq -r '.storage_apply')" +storage="$(bash "$SCRIPT_DIR/validate_storage.sh" "$SOURCE")" + +# ── Step 5: NIC remap plan ── +network_apply_in_mode="$(printf '%s' "$mode_plan" | jq -r '.network_apply')" +network="$(bash "$SCRIPT_DIR/remap_network.sh" "$SOURCE")" + +# ── Step 6: Driver reinstaller plan ── +# In modes that don't include components (storage_only, network_only, +# custom-without-explicit), we narrow the driver plan to nothing. +components_in_mode="$(printf '%s' "$mode_plan" | jq -c '.components_include')" +drivers_full_plan="$(bash "$SCRIPT_DIR/reinstall_drivers.sh" "$SOURCE")" +drivers_plan="$(printf '%s' "$drivers_full_plan" | jq --argjson ids "$components_in_mode" ' + if ($ids | length) == 0 then + .plan |= [] + else + .plan |= map(select(.component_id as $id | $ids | index($id) != null)) + end +')" + +drivers_applied='null' +apply_done=false +abort_reason="" + +if [[ "$APPLY" == 1 ]]; then + if [[ "$fail_count" -gt 0 ]]; then + abort_reason="preflight has $fail_count failing check(s) — refusing --apply" + else + # Driver reinstall only runs if the selected mode includes components. + # Modes that don't (storage_only, network_only) keep drivers untouched. + if [[ "$(printf '%s' "$components_in_mode" | jq 'length')" -gt 0 ]]; then + drivers_full="$(bash "$SCRIPT_DIR/reinstall_drivers.sh" "$SOURCE" --apply)" + # Narrow to components selected by the mode + drivers_applied="$(printf '%s' "$drivers_full" | jq --argjson ids "$components_in_mode" ' + .applied | map(select(.component_id as $id | $ids | index($id) != null)) + ')" + else + drivers_applied='[]' + fi + # TODO(13D): delegate the actual file extraction (paths_include / + # paths_exclude from $mode_plan) + storage_apply / network_apply + # decisions to backup_host.sh's _rs_apply(). This is the integration + # seam between the manifest-aware tooling and the existing extraction + # engine. + apply_done=true + fi +fi + +# Decorate sections that aren't part of the selected mode so the report +# is honest about what would actually be touched. +storage_for_report="$(jq -n --argjson s "$storage" --argjson in_mode "$storage_apply_in_mode" \ + '$s + {in_selected_mode: $in_mode}')" +network_for_report="$(jq -n --argjson n "$network" --argjson in_mode "$network_apply_in_mode" \ + '$n + {in_selected_mode: $in_mode}')" + +report="$(jq -n \ + --argjson manifest_source_host "$(printf '%s' "$manifest" | jq '.source_host')" \ + --argjson mode_plan "$mode_plan" \ + --argjson preflight "$preflight" \ + --argjson storage "$storage_for_report" \ + --argjson network "$network_for_report" \ + --argjson drivers_plan "$(printf '%s' "$drivers_plan" | jq '.plan')" \ + --argjson drivers_applied "$drivers_applied" \ + --argjson apply_done "$apply_done" \ + --arg abort_reason "$abort_reason" \ + '{ + source_host_at_backup: $manifest_source_host, + selected_mode: $mode_plan, + preflight: $preflight, + storage: $storage, + network: $network, + driver_reinstall: { + plan: $drivers_plan, + applied: $drivers_applied + }, + applied: $apply_done, + abort_reason: (if $abort_reason == "" then null else $abort_reason end) + }')" + +if [[ "$FORMAT" == "text" ]]; then + # Brief human summary on stderr; the JSON still goes to stdout so the + # caller can pipe it elsewhere. + { + printf '─────────────────────────────────────────────\n' + printf 'ProxMenux Restore — dry-run report\n' + printf '─────────────────────────────────────────────\n' + printf 'Source host : %s (PVE %s)\n' \ + "$(printf '%s' "$report" | jq -r '.source_host_at_backup.hostname')" \ + "$(printf '%s' "$report" | jq -r '.source_host_at_backup.pve_version // "-"')" + printf 'Mode : %s — %s paths in, %s components\n' \ + "$MODE" \ + "$(printf '%s' "$report" | jq -r '.selected_mode.paths_include | length')" \ + "$(printf '%s' "$report" | jq -r '.selected_mode.components_include | length')" + printf 'Pre-flight : %s pass · %s warn · %s fail\n' \ + "$(printf '%s' "$report" | jq -r '.preflight.summary.pass')" \ + "$(printf '%s' "$report" | jq -r '.preflight.summary.warn')" \ + "$(printf '%s' "$report" | jq -r '.preflight.summary.fail')" + printf 'Storage : %s pools / %s LVM VGs / %s PVE storages [in mode: %s]\n' \ + "$(printf '%s' "$report" | jq -r '.storage.zfs | length')" \ + "$(printf '%s' "$report" | jq -r '.storage.lvm | length')" \ + "$(printf '%s' "$report" | jq -r '.storage.pve_storage | length')" \ + "$(printf '%s' "$report" | jq -r '.storage.in_selected_mode')" + printf 'Network : %s keep / %s remap / %s orphan / %s new [in mode: %s]\n' \ + "$(printf '%s' "$report" | jq -r '.network.keep | length')" \ + "$(printf '%s' "$report" | jq -r '.network.remap | length')" \ + "$(printf '%s' "$report" | jq -r '.network.orphan | length')" \ + "$(printf '%s' "$report" | jq -r '.network.new | length')" \ + "$(printf '%s' "$report" | jq -r '.network.in_selected_mode')" + printf 'Drivers : %s in plan\n' \ + "$(printf '%s' "$report" | jq -r '.driver_reinstall.plan | length')" + if [[ "$APPLY" == 1 ]]; then + printf '─── APPLY ───\n' + if [[ -n "$abort_reason" ]]; then + printf 'ABORTED: %s\n' "$abort_reason" + else + printf 'Drivers applied: %s\n' \ + "$(printf '%s' "$report" | jq -r '.driver_reinstall.applied | length')" + fi + fi + printf '─────────────────────────────────────────────\n' + } >&2 +fi + +printf '%s\n' "$report" diff --git a/scripts/backup_restore/restore/validate_storage.sh b/scripts/backup_restore/restore/validate_storage.sh new file mode 100644 index 00000000..afc8ce6c --- /dev/null +++ b/scripts/backup_restore/restore/validate_storage.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash +# ========================================================== +# ProxMenux backup restore — storage validation +# ========================================================== +# Walks the manifest's storage_inventory and reports per-pool / +# per-storage whether it can be auto-restored on this host. Builds +# the "what's safe to import vs needs manual work" picture that +# the orchestrator turns into actionable steps. +# +# Usage: +# validate_storage.sh +# +# Output: JSON {zfs: [...], lvm: [...], pve_storage: [...]} with +# per-item action (auto_import / partial / manual_required / present) +# and the disks/parameters that drove the decision. +# ========================================================== +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE="${1:-}" +[[ -z "$SOURCE" ]] && { printf 'validate_storage: missing manifest source\n' >&2; exit 64; } + +manifest="$(bash "$SCRIPT_DIR/parse_manifest.sh" "$SOURCE")" + +# ── ZFS pools ── +zfs_report='[]' +while IFS= read -r pool_json; do + [[ -z "$pool_json" ]] && continue + name="$(printf '%s' "$pool_json" | jq -r '.name')" + needed_devs="$(printf '%s' "$pool_json" | jq -r '.devices_by_id[]?')" + present=() + missing=() + while IFS= read -r dev; do + [[ -z "$dev" ]] && continue + if [[ -e "/dev/disk/by-id/$dev" ]]; then + present+=("$dev") + else + missing+=("$dev") + fi + done <<< "$needed_devs" + + # Already imported? + if zpool list -H -o name 2>/dev/null | grep -qFx "$name"; then + action="present" + elif [[ ${#missing[@]} -eq 0 ]]; then + action="auto_import" + elif [[ ${#present[@]} -gt 0 ]]; then + action="partial" + else + action="manual_required" + fi + + present_json="$(printf '%s\n' "${present[@]:-}" | jq -R . | jq -s 'map(select(. != ""))')" + missing_json="$(printf '%s\n' "${missing[@]:-}" | jq -R . | jq -s 'map(select(. != ""))')" + + zfs_report="$(jq --argjson acc "$zfs_report" \ + --arg name "$name" \ + --arg action "$action" \ + --argjson present "$present_json" \ + --argjson missing "$missing_json" \ + -n '$acc + [{ + name: $name, + action: $action, + present: $present, + missing: $missing + }]')" +done < <(printf '%s' "$manifest" | jq -c '.storage_inventory.zfs_pools[]?') + +# ── LVM volume groups ── +lvm_report='[]' +while IFS= read -r vg_json; do + [[ -z "$vg_json" ]] && continue + name="$(printf '%s' "$vg_json" | jq -r '.name')" + if command -v vgs >/dev/null 2>&1 && vgs --noheadings -o vg_name 2>/dev/null | grep -qE "^[[:space:]]*${name}[[:space:]]*$"; then + action="present" + else + action="manual_required" + fi + lvm_report="$(jq --argjson acc "$lvm_report" \ + --arg name "$name" --arg action "$action" \ + -n '$acc + [{ name: $name, action: $action }]')" +done < <(printf '%s' "$manifest" | jq -c '.storage_inventory.lvm.vgs[]?') + +# ── PVE storage.cfg entries ── +# For each storage entry in the manifest we report whether it currently +# exists in the destination's storage.cfg (no action needed), whether the +# backing resource is reachable (e.g. NFS server pings), and what kind of +# follow-up is required if the storage.cfg is being restored. +pve_report='[]' +existing_pve_ids='[]' +if [[ -r /etc/pve/storage.cfg ]]; then + existing_pve_ids="$(awk '/^[a-z]+:[[:space:]]+/{print $2}' /etc/pve/storage.cfg | jq -R . | jq -s .)" +fi + +while IFS= read -r st_json; do + [[ -z "$st_json" ]] && continue + id="$(printf '%s' "$st_json" | jq -r '.id')" + type="$(printf '%s' "$st_json" | jq -r '.type')" + server="$(printf '%s' "$st_json" | jq -r '.server // ""')" + pool="$(printf '%s' "$st_json" | jq -r '.pool // ""')" + + already_present="$(printf '%s' "$existing_pve_ids" | jq -r --arg i "$id" 'any(. == $i)')" + reachable_note="" + + case "$type" in + nfs|cifs) + if [[ -n "$server" ]]; then + if ping -c 1 -W 1 "$server" >/dev/null 2>&1; then + reachable_note="reachable" + else + reachable_note="server_unreachable" + fi + fi + ;; + zfspool) + # The pool name in storage.cfg is e.g. "rpool/data" — only valid + # if the parent pool is imported. + parent_pool="${pool%%/*}" + if [[ -n "$parent_pool" ]] && zpool list -H -o name 2>/dev/null | grep -qFx "$parent_pool"; then + reachable_note="pool_imported" + else + reachable_note="pool_not_imported" + fi + ;; + esac + + if [[ "$already_present" == "true" ]]; then + action="present" + else + action="will_be_restored" + fi + + pve_report="$(jq --argjson acc "$pve_report" \ + --arg id "$id" --arg type "$type" --arg action "$action" --arg note "$reachable_note" \ + -n '$acc + [{ + id: $id, + type: $type, + action: $action, + note: (if $note == "" then null else $note end) + }]')" +done < <(printf '%s' "$manifest" | jq -c '.storage_inventory.pve_storage_cfg[]?') + +# Compose +jq -n \ + --argjson zfs "$zfs_report" \ + --argjson lvm "$lvm_report" \ + --argjson pve "$pve_report" \ + '{ zfs: $zfs, lvm: $lvm, pve_storage: $pve }' diff --git a/scripts/backup_restore/run_scheduled_backup.sh b/scripts/backup_restore/run_scheduled_backup.sh index 73d86e7c..2f9318d3 100644 --- a/scripts/backup_restore/run_scheduled_backup.sh +++ b/scripts/backup_restore/run_scheduled_backup.sh @@ -57,7 +57,10 @@ _sb_prune_local() { for f in "${files[@]}"; do idx=$((idx+1)) (( idx <= keep_last )) && continue - rm -f "$f" || true + # Remove the archive AND its sidecar in one shot — if we + # leave .proxmenux.json files behind, the Monitor would + # show them as broken entries pointing at deleted archives. + rm -f "$f" "${f}.proxmenux.json" || true done fi } @@ -80,6 +83,11 @@ _sb_run_local() { archive_ext="tar.gz" fi + # Drop a sidecar JSON next to the archive — explicit marker the + # Monitor can use to identify this as a scheduled host backup, + # independent of any future rename of the archive. + hb_write_archive_sidecar "$archive" "scheduled" "$job_id" "${PROFILE:-}" || true + _sb_prune_local "$job_id" "$dest_dir" "$archive_ext" echo "LOCAL_ARCHIVE=$archive" return 0 diff --git a/scripts/backup_restore/schema/manifest.schema.json b/scripts/backup_restore/schema/manifest.schema.json new file mode 100644 index 00000000..51583ae7 --- /dev/null +++ b/scripts/backup_restore/schema/manifest.schema.json @@ -0,0 +1,305 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://proxmenux.dev/schemas/host-backup-manifest/v1.json", + "title": "ProxMenux Host Backup Manifest", + "description": "Recipe-style manifest embedded inside every ProxMenux host backup. Generated at backup time and consumed by the restore wizard for compatibility checks, driver reinstallation via ProxMenux installers, and hardware/storage validation before applying any file. Schema designed against the .55 fixture (AMD homelab, LVM, vfio passthrough, NFS+CIFS) and the .89 production target (NVIDIA Quadro + ZFS).", + "type": "object", + "required": ["proxmenux_backup_manifest"], + "additionalProperties": false, + "properties": { + "proxmenux_backup_manifest": { + "type": "object", + "required": ["schema_version", "created_at", "source_host"], + "additionalProperties": false, + "properties": { + "schema_version": { + "type": "integer", + "minimum": 1, + "description": "Manifest schema version. Bumped on breaking field changes." + }, + "created_at": { + "type": "string", + "format": "date-time", + "description": "ISO8601 timestamp of backup creation." + }, + "created_by": { + "type": "string", + "description": "Tool + version, e.g. 'proxmenux-host-backup/1.3.0'." + }, + "source_host": { "$ref": "#/$defs/source_host" }, + "hardware_inventory": { "$ref": "#/$defs/hardware_inventory" }, + "storage_inventory": { "$ref": "#/$defs/storage_inventory" }, + "proxmenux_installed_components": { + "type": "array", + "items": { "$ref": "#/$defs/installed_component" } + }, + "kernel_params": { "$ref": "#/$defs/kernel_params" }, + "vms_lxcs_at_backup": { "$ref": "#/$defs/guests_inventory" }, + "backup_metadata": { "$ref": "#/$defs/backup_metadata" } + } + } + }, + + "$defs": { + "source_host": { + "type": "object", + "required": ["hostname", "kernel", "roles", "boot_mode", "cpu_arch", "memory_kb"], + "additionalProperties": false, + "properties": { + "hostname": { "type": "string", "minLength": 1 }, + "pve_version": { "type": ["string", "null"], "description": "Short version like 9.2.2." }, + "pve_version_full": { "type": ["string", "null"], "description": "Full pveversion line." }, + "pbs_version": { "type": ["string", "null"], "description": "Present only if proxmox-backup-server is installed." }, + "roles": { + "type": "array", + "items": { "enum": ["pve", "pbs"] }, + "minItems": 1, + "uniqueItems": true, + "description": "Domain roles present on the host. Drives recipe selection at restore." + }, + "kernel": { "type": "string" }, + "boot_mode": { "enum": ["efi", "bios"] }, + "root_fs": { "enum": ["ext4", "xfs", "btrfs", "zfs"] }, + "cpu_model": { "type": "string" }, + "cpu_arch": { "enum": ["x86_64", "aarch64"] }, + "memory_kb": { "type": "integer", "minimum": 0 }, + "subscription_status": { "type": ["string", "null"] } + } + }, + + "hardware_inventory": { + "type": "object", + "additionalProperties": false, + "properties": { + "gpu": { "type": "array", "items": { "$ref": "#/$defs/gpu" } }, + "tpu": { "type": "array", "items": { "$ref": "#/$defs/tpu" } }, + "nic": { "type": "array", "items": { "$ref": "#/$defs/nic" } }, + "wireless": { "type": "array", "items": { "$ref": "#/$defs/wireless" } } + } + }, + + "gpu": { + "type": "object", + "required": ["vendor", "pci_address", "pci_id"], + "additionalProperties": false, + "properties": { + "vendor": { "enum": ["AMD", "NVIDIA", "Intel", "Other"] }, + "model": { "type": "string" }, + "pci_address": { "type": "string", "pattern": "^[0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\\.[0-9a-f]$" }, + "pci_id": { "type": "string", "pattern": "^[0-9a-f]{4}:[0-9a-f]{4}$" }, + "kernel_driver": { "type": ["string", "null"] }, + "passthrough_eligible": { "type": "boolean" }, + "proxmenux_installer": { "type": ["string", "null"], "description": "Relative path to the installer script, e.g. 'scripts/gpu_tpu/nvidia_installer.sh'. Null if no installer is applicable (AMD iGPU, etc.)." }, + "installed_driver_version": { "type": ["string", "null"] } + } + }, + + "tpu": { + "type": "object", + "required": ["vendor"], + "additionalProperties": false, + "properties": { + "vendor": { "enum": ["Google", "Other"] }, + "model": { "type": "string" }, + "bus": { "enum": ["PCIe", "USB"] }, + "pci_address": { "type": ["string", "null"] }, + "proxmenux_installer": { "type": ["string", "null"] }, + "installed_version": { "type": ["string", "null"] } + } + }, + + "nic": { + "type": "object", + "required": ["ifname", "mac"], + "additionalProperties": false, + "properties": { + "ifname": { "type": "string" }, + "mac": { "type": "string", "pattern": "^[0-9a-f:]{17}$" }, + "kernel_driver": { "type": ["string", "null"] }, + "in_bridges": { "type": "array", "items": { "type": "string" } }, + "operstate": { "enum": ["UP", "DOWN", "UNKNOWN"] } + } + }, + + "wireless": { + "type": "object", + "required": ["ifname", "mac"], + "additionalProperties": false, + "properties": { + "ifname": { "type": "string" }, + "mac": { "type": "string" } + } + }, + + "storage_inventory": { + "type": "object", + "additionalProperties": false, + "properties": { + "zfs_pools": { "type": "array", "items": { "$ref": "#/$defs/zfs_pool" } }, + "lvm": { "$ref": "#/$defs/lvm_state" }, + "physical_disks": { "type": "array", "items": { "$ref": "#/$defs/physical_disk" } }, + "pve_storage_cfg": { "type": "array", "items": { "$ref": "#/$defs/storage_entry" } }, + "mounts": { "type": "array", "items": { "$ref": "#/$defs/mount" } } + } + }, + + "zfs_pool": { + "type": "object", + "required": ["name"], + "additionalProperties": false, + "properties": { + "name": { "type": "string" }, + "type": { "enum": ["mirror", "raidz1", "raidz2", "raidz3", "stripe", "single"] }, + "devices_by_id": { "type": "array", "items": { "type": "string" }, "description": "Disks referenced by stable /dev/disk/by-id paths. Restore validates these exist before importing." }, + "mountpoint": { "type": "string" }, + "compression": { "type": "string" }, + "size_bytes": { "type": "integer" }, + "health": { "enum": ["ONLINE", "DEGRADED", "FAULTED", "OFFLINE", "UNAVAIL", "REMOVED"] } + } + }, + + "lvm_state": { + "type": "object", + "additionalProperties": false, + "properties": { + "vgs": { "type": "array", "items": { "$ref": "#/$defs/lvm_vg" } } + } + }, + + "lvm_vg": { + "type": "object", + "required": ["name"], + "additionalProperties": false, + "properties": { + "name": { "type": "string" }, + "size_bytes": { "type": "integer" }, + "thin_pools": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "lv_name": { "type": "string" }, + "size_bytes": { "type": "integer" } + } + } + } + } + }, + + "physical_disk": { + "type": "object", + "required": ["name"], + "additionalProperties": false, + "properties": { + "name": { "type": "string" }, + "model": { "type": ["string", "null"] }, + "size_bytes": { "type": "integer" }, + "by_id": { "type": ["string", "null"], "description": "Stable /dev/disk/by-id symlink target if available. Used to detect the same disk on different controller layouts." } + } + }, + + "storage_entry": { + "type": "object", + "required": ["id", "type"], + "description": "additionalProperties is intentionally TRUE here. PVE's storage.cfg accepts driver-specific options (sparse, krbd, fingerprint, datacenter, fs-name, nodes, prune-backups, …) that vary across releases and we don't want to play whack-a-mole on the schema every time someone uses a flag we haven't seen.", + "properties": { + "id": { "type": "string" }, + "type": { "enum": ["dir", "lvm", "lvmthin", "zfs", "zfspool", "btrfs", "nfs", "cifs", "iscsi", "rbd", "cephfs", "pbs", "esxi"] }, + "path": { "type": ["string", "null"] }, + "server": { "type": ["string", "null"] }, + "export": { "type": ["string", "null"] }, + "share": { "type": ["string", "null"] }, + "thinpool": { "type": ["string", "null"] }, + "vgname": { "type": ["string", "null"] }, + "pool": { "type": ["string", "null"] }, + "content": { "type": "array", "items": { "type": "string" } } + } + }, + + "mount": { + "type": "object", + "required": ["target", "source", "fstype"], + "additionalProperties": false, + "properties": { + "target": { "type": "string" }, + "source": { "type": "string" }, + "fstype": { "type": "string" }, + "options": { "type": ["string", "null"] } + } + }, + + "installed_component": { + "type": "object", + "required": ["id"], + "additionalProperties": false, + "properties": { + "id": { "type": "string", "description": "Stable identifier matching ProxMenux managed_installs registry." }, + "type": { "type": "string", "description": "Type from registry, e.g. nvidia_xfree86, coral, oci_app, tailscale." }, + "version_at_backup": { "type": ["string", "null"] }, + "proxmenux_installer": { "type": ["string", "null"], "description": "Installer script relative to scripts/, called with --auto-from-manifest at restore." }, + "applied_settings": { "type": "array", "items": { "type": "string" }, "description": "Named tweaks applied (log2ram, memory_settings, etc.). Reapplied via post-install scripts at restore." } + } + }, + + "kernel_params": { + "type": "object", + "additionalProperties": false, + "properties": { + "cmdline_extra": { "type": "array", "items": { "type": "string" }, "description": "Extra entries in /proc/cmdline beyond the default PVE set (BOOT_IMAGE, root, ro). Used to detect iommu, hugepages, custom GRUB tunings." }, + "modules_loaded_at_boot": { "type": "array", "items": { "type": "string" }, "description": "Contents of /etc/modules." }, + "modprobe_d_files": { "type": "array", "items": { "type": "string" }, "description": "Paths under /etc/modprobe.d/ that contain user-defined options/blacklists." } + } + }, + + "guests_inventory": { + "type": "object", + "additionalProperties": false, + "properties": { + "vms": { + "type": "array", + "items": { + "type": "object", + "required": ["vmid", "name"], + "additionalProperties": false, + "properties": { + "vmid": { "type": "integer", "minimum": 100, "maximum": 999999999 }, + "name": { "type": "string" }, + "memory_mb": { "type": "integer" }, + "bootdisk_gb": { "type": "number" }, + "status": { "enum": ["running", "stopped", "paused"] }, + "config_file": { "type": ["string", "null"], "description": "Relative path inside the backup archive where the .conf was captured." } + } + } + }, + "lxcs": { + "type": "array", + "items": { + "type": "object", + "required": ["vmid", "name"], + "additionalProperties": false, + "properties": { + "vmid": { "type": "integer", "minimum": 100, "maximum": 999999999 }, + "name": { "type": "string" }, + "status": { "enum": ["running", "stopped"] }, + "config_file": { "type": ["string", "null"] } + } + } + } + } + }, + + "backup_metadata": { + "type": "object", + "additionalProperties": false, + "properties": { + "encrypted": { "type": "boolean" }, + "encryption_format": { "type": ["string", "null"], "enum": [null, "age", "gpg"] }, + "compression": { "enum": ["none", "gzip", "zstd"] }, + "paths_archived": { "type": ["array", "null"], "items": { "type": "string" }, "description": "Snapshot of every host path that ended up inside the archive. Used by restore to drive selective category extraction." }, + "sha256_archive": { "type": ["string", "null"], "pattern": "^[0-9a-f]{64}$" }, + "size_bytes": { "type": ["integer", "null"] } + } + } + } +} diff --git a/scripts/menus/main_menu.sh b/scripts/menus/main_menu.sh index f26fdb2f..2e1649c0 100644 --- a/scripts/menus/main_menu.sh +++ b/scripts/menus/main_menu.sh @@ -94,7 +94,7 @@ show_menu() { dialog --clear \ --backtitle "ProxMenux" \ --title "$(translate "$menu_title")" \ - --menu "\n$(translate "Select an option:")" 20 70 11 \ + --menu "\n$(translate "Select an option:")" 21 70 12 \ 1 "$(translate "Settings post-install Proxmox")" \ 2 "$(translate "Hardware: GPUs and Coral-TPU")" \ 3 "$(translate "Create VM from template or script")" \ @@ -104,6 +104,7 @@ show_menu() { 7 "$(translate "Network Management")" \ 8 "$(translate "Security")" \ 9 "$(translate "Utilities and Tools")" \ + b "$(translate "Host Backup & Restore")" \ h "$(translate "Help and Info Commands")" \ s "$(translate "Settings")" \ 0 "$(translate "Exit")" 2>"$TEMP_FILE" @@ -129,6 +130,7 @@ show_menu() { 7) exec bash "$LOCAL_SCRIPTS/menus/network_menu.sh" ;; 8) exec bash "$LOCAL_SCRIPTS/menus/security_menu.sh" ;; 9) exec bash "$LOCAL_SCRIPTS/menus/utilities_menu.sh" ;; + b) bash "$LOCAL_SCRIPTS/backup_restore/backup_host.sh" ;; h) bash "$LOCAL_SCRIPTS/help_info_menu.sh" ;; s) exec bash "$LOCAL_SCRIPTS/menus/config_menu.sh" ;; 0) clear; msg_ok "$(translate "Thank you for using ProxMenux. Goodbye!")"; rm -f "$TEMP_FILE"; exit 0 ;;