From c37f96db42ff92f67e2dd12a891136ebdf5240fc Mon Sep 17 00:00:00 2001 From: Thierry Laurion Date: Fri, 6 Mar 2026 13:50:32 -0500 Subject: [PATCH 1/2] functions: add full track tracing when doing TRACE_FUNC when DEBUG is enabled Signed-off-by: Thierry Laurion --- initrd/etc/functions | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/initrd/etc/functions b/initrd/etc/functions index 125ca51e4..30f19c4a3 100644 --- a/initrd/etc/functions +++ b/initrd/etc/functions @@ -1,5 +1,21 @@ #!/bin/bash +# maintain a cross-script trace stack. When a script sources /etc/functions +# this appends the script name/line to TRACE_STACK; the variable is exported so +# it survives into children invoked with exec. TRACE_FUNC will prepend this +# stack to the normal function call stack, giving a full picture from init to +# the current point (even across multiple scripts). +# Only add the current script once to avoid repetition when the same script +# sources this file multiple times or invokes TRACE_FUNC repeatedly. +case "${TRACE_STACK}" in +*"main($0:"*) + ;; +*) + TRACE_STACK="${TRACE_STACK:+$TRACE_STACK -> }main($0:0)" + export TRACE_STACK + ;; +esac + # ------- Start of functions coming from /etc/ash_functions die() { @@ -680,8 +696,12 @@ TRACE_FUNC() { # Append the direct caller (without extra " -> " at the end) stack_trace+="${FUNCNAME[1]}(${BASH_SOURCE[1]}:${BASH_LINENO[0]})" - # Print the final trace output - TRACE "${stack_trace}" + # Print the final trace output, including any inherited script-level stack + if [ -n "$TRACE_STACK" ]; then + TRACE "$TRACE_STACK -> $stack_trace" + else + TRACE "${stack_trace}" + fi } # Show the entire current call stack in debug output - useful if a catastrophic From 9d03303fd9d09cbac4a12af146cfdba8f66d712d Mon Sep 17 00:00:00 2001 From: Thierry Laurion Date: Fri, 6 Mar 2026 15:48:30 -0500 Subject: [PATCH 2/2] root-hashes: simplify root detection and unsupported-layout UX Compared to HEAD^, this commit updates initrd root-hash probing in: - initrd/bin/root-hashes-gui.sh - initrd/etc/functions Behavior expected to work: - Root-hash create/verify flow on latest Ubuntu, Debian, and PureOS under KVM. - LUKS/LVM root probing based on mountability + expected root directory checks. - Clear unsupported-layout whiptail guidance for unsupported filesystem/layout combinations. Current status and non-goals: - Fedora and QubesOS are untested in this change set. - QubesOS on coreboot q35 with Heads still does not support qemu/kvm; no regression is implied. Signed-off-by: Thierry Laurion --- initrd/bin/root-hashes-gui.sh | 157 +++++++++++++++++++++++++++++----- initrd/etc/functions | 29 ++++++- 2 files changed, 163 insertions(+), 23 deletions(-) diff --git a/initrd/bin/root-hashes-gui.sh b/initrd/bin/root-hashes-gui.sh index 67881f554..de645c10e 100755 --- a/initrd/bin/root-hashes-gui.sh +++ b/initrd/bin/root-hashes-gui.sh @@ -5,6 +5,8 @@ set -e -o pipefail CONFIG_ROOT_DIRLIST="bin boot lib sbin usr" HASH_FILE="/boot/kexec_root_hashes.txt" ROOT_MOUNT="/root" +ROOT_DETECT_UNSUPPORTED_REASON="" +ROOT_SUPPORTED_LAYOUT_MSG="Filesystem support in this build:\n- ext4 (ext2/ext3 compatible)\n- xfs\n\nSupported root layouts:\n- LUKS + ext4/ext3/ext2 or xfs\n- LUKS+LVM + ext4/ext3/ext2 or xfs\n\nNot supported:\n- btrfs" . /etc/functions . /etc/gui_functions @@ -12,8 +14,20 @@ ROOT_MOUNT="/root" export CONFIG_ROOT_DIRLIST_PRETTY=$(echo $CONFIG_ROOT_DIRLIST | sed -e 's/^/\//;s/ / \//g') +show_unsupported_root_layout_and_die() { + local ACTION="$1" + + whiptail_error --title 'ERROR: Unsupported Root Layout' \ + --msgbox "$ROOT_DETECT_UNSUPPORTED_REASON\n\n$ROOT_SUPPORTED_LAYOUT_MSG\n\nTry a supported root layout,\nor do not use root hashing,\nthen rerun $ACTION." 0 80 + die "$ROOT_DETECT_UNSUPPORTED_REASON" +} + update_root_checksums() { + TRACE_FUNC if ! detect_root_device; then + if [ -n "$ROOT_DETECT_UNSUPPORTED_REASON" ]; then + show_unsupported_root_layout_and_die "root hash update" + fi whiptail_error --title 'ERROR: No Valid Root Disk Found' \ --msgbox "No Valid Root Disk Found" 0 80 die "No Valid Root Disk Found" @@ -31,6 +45,7 @@ update_root_checksums() { mount -o rw,remount /boot fi + DEBUG "calculating hashes for $CONFIG_ROOT_DIRLIST_PRETTY on $ROOT_MOUNT" echo "+++ Calculating hashes for all files in $CONFIG_ROOT_DIRLIST_PRETTY " # Intentional wordsplit # shellcheck disable=SC2086 @@ -47,7 +62,12 @@ update_root_checksums() { unmount_root_device } check_root_checksums() { + TRACE_FUNC + DEBUG "verifying existing hash file for $CONFIG_ROOT_DIRLIST_PRETTY" if ! detect_root_device; then + if [ -n "$ROOT_DETECT_UNSUPPORTED_REASON" ]; then + show_unsupported_root_layout_and_die "root hash verification" + fi whiptail_error --title 'ERROR: No Valid Root Disk Found' \ --msgbox "No Valid Root Disk Found" 0 80 die "No Valid Root Disk Found" @@ -74,6 +94,7 @@ check_root_checksums() { update_root_checksums return 0 else + DEBUG "Root hash file not created (user declined)" exit 1 fi fi @@ -124,6 +145,7 @@ check_root_checksums() { return 0 else + DEBUG "Signatures not updated (user declined after new-files warning)" return 1 fi fi @@ -154,6 +176,7 @@ check_root_checksums() { update_root_checksums return 0 else + DEBUG "Signatures not updated (user declined after hash-check failure)" return 1 fi fi @@ -164,21 +187,69 @@ check_root_checksums() { open_block_device_lvm() { TRACE_FUNC local VG="$1" + local LV MAPPER_VG MAPPER_LV name lvpath FIRST_LV_PREFERRED FIRST_LV_FALLBACK if ! lvm vgchange -ay "$VG"; then DEBUG "Can't open LVM VG: $VG" return 1 fi - # Use the LV 'root'. This is the default name used by Qubes. There's no - # way to configure this at the moment. - if ! [ -e "/dev/mapper/$VG-root" ]; then - DEBUG "LVM volume group does not have 'root' logical volume" + # Prefer an LV named 'root' (used by Qubes), but fall back to any LV + # in the VG. This ensures Ubuntu-style names (e.g. ubuntu-vg/ubuntu-root) + # also work. + LV="/dev/$VG/root" + if ! [ -e "$LV" ]; then + MAPPER_VG="${VG//-/--}" + LV="/dev/mapper/${MAPPER_VG}-root" + fi + if ! [ -e "$LV" ]; then + FIRST_LV_PREFERRED="" + FIRST_LV_FALLBACK="" + DEBUG "LVM VG $VG has no 'root' LV, enumerating all LVs" + # list LV names and prefer root-like names + for name in $(lvm lvs --noheadings -o lv_name --separator ' ' "$VG" 2>/dev/null); do + # thin pool/metadata and swap-like LVs are not root filesystems + case "$name" in + *pool*|*tmeta*|*tdata*|*tpool*|swap*) + DEBUG "skipping LV name $name (not a root LV candidate)" + continue + ;; + esac + + lvpath="/dev/$VG/$name" + if ! [ -e "$lvpath" ]; then + MAPPER_LV="${name//-/--}" + lvpath="/dev/mapper/${VG//-/--}-${MAPPER_LV}" + fi + if [ -e "$lvpath" ]; then + case "$name" in + root|dom0|dom0-root|qubes_dom0|qubes_dom0-root|*dom0*root*|*root*) + [ -n "$FIRST_LV_PREFERRED" ] || FIRST_LV_PREFERRED="$lvpath" + DEBUG "preferred LV candidate $lvpath (name $name)" + ;; + *) + [ -n "$FIRST_LV_FALLBACK" ] || FIRST_LV_FALLBACK="$lvpath" + ;; + esac + fi + done + + if [ -n "$FIRST_LV_PREFERRED" ]; then + DEBUG "selecting preferred LV $FIRST_LV_PREFERRED in VG $VG" + LV="$FIRST_LV_PREFERRED" + elif [ -n "$FIRST_LV_FALLBACK" ]; then + DEBUG "falling back to first mountable LV $FIRST_LV_FALLBACK in VG $VG" + LV="$FIRST_LV_FALLBACK" + else + LV="" + fi + fi + if ! [ -e "$LV" ]; then + DEBUG "no usable LV found in VG $VG" return 1 fi - - # Use the root LV now - open_block_device_layers "/dev/mapper/$VG-root" + # Use selected LV + open_block_device_layers "$LV" } # Open a LUKS device, then continue looking for more layers. @@ -195,6 +266,15 @@ open_block_device_luks() { return 1 fi + # Inform LVM about any new physical volume inside this decrypted container. + # Some distributions (Fedora) require a vgscan before LVM will create nodes + # under /dev/mapper, otherwise our later search won't see the logical + # volumes. This is harmless on systems without lvm installed. + if command -v lvm >/dev/null 2>&1; then + DEBUG "running vgscan to populate /dev/mapper after unlocking LUKS" + lvm vgscan --mknodes >/dev/null 2>&1 || true + fi + open_block_device_layers "/dev/mapper/$LUKSDEV" } @@ -241,14 +321,28 @@ open_block_device_layers() { open_root_device_no_clean_up() { TRACE_FUNC local DEVICE="$1" - local FS_DEVICE + local FS_DEVICE BLKID_OUT # Open LUKS/LVM and get the name of the block device that should contain the # filesystem. If there are no LUKS/LVM layers, FS_DEVICE is just DEVICE. FS_DEVICE="$(open_block_device_layers "$DEVICE")" || return 1 + # Keep detection minimal for initrd: only require blkid to return some + # metadata before mount probing. TYPE is often unavailable in this initrd. + BLKID_OUT="$(blkid "$FS_DEVICE" 2>/dev/null || true)" + DEBUG "blkid output for $FS_DEVICE: $BLKID_OUT" + + # If blkid reports nothing at all, this is likely not a filesystem-bearing + # partition. Skip mount probing to avoid noisy kernel probe logs. + if [ -z "$BLKID_OUT" ]; then + ROOT_DETECT_UNSUPPORTED_REASON="Found partition/layer with no recognizable filesystem metadata." + DEBUG "Skipping $FS_DEVICE: blkid returned no filesystem metadata" + return 1 + fi + # Mount the device if ! mount -o ro "$FS_DEVICE" "$ROOT_MOUNT" &>/dev/null; then + ROOT_DETECT_UNSUPPORTED_REASON="Found partition/layer on $FS_DEVICE but it could not be mounted as root by this root-hash flow." DEBUG "Can't mount filesystem on $FS_DEVICE from $DEVICE" return 1 fi @@ -269,14 +363,8 @@ open_root_device_no_clean_up() { close_block_device_lvm() { TRACE_FUNC local VG="$1" - - # We always use the LV 'root' currently - local LV="/dev/mapper/$VG-root" - if [ -e "$LV" ]; then - close_block_device_layers "$LV" - fi - - # The LVM VG might be open even if no 'root' LV exists, still try to close it. + # Deactivate the VG directly. This avoids recursive LV close probing noise + # for LV paths that are not PVs and matches the minimal initrd workflow. lvm vgchange -an "$VG" || \ DEBUG "Can't close LVM VG: $VG" } @@ -325,7 +413,7 @@ close_block_device_layers() { open_root_device() { TRACE_FUNC if ! open_root_device_no_clean_up "$1"; then - unmount_root_device + close_root_device "$1" return 1 fi @@ -360,20 +448,24 @@ detect_root_device() fi # Ensure nothing is opened/mounted unmount_root_device + ROOT_DETECT_UNSUPPORTED_REASON="" # check $CONFIG_ROOT_DEV if set/valid - if [ -e "$CONFIG_ROOT_DEV" ] && open_root_device "$CONFIG_ROOT_DEV"; then + # run open_root_device with fd10 closed so external tools don't inherit it + if [ -e "$CONFIG_ROOT_DEV" ] && open_root_device "$CONFIG_ROOT_DEV" 10<&-; then return 0 fi # generate list of possible boot devices fdisk -l 2>/dev/null | grep "Disk /dev/" | cut -f2 -d " " | cut -f1 -d ":" > /tmp/disklist + DEBUG "detect_root_device: initial disklist=$(cat /tmp/disklist | tr '\n' ' ')" # filter out extraneous options > /tmp_root_device_list while IFS= read -r -u 10 i; do # remove block device from list if numeric partitions exist DEV_NUM_PARTITIONS=$((`ls -1 $i* | wc -l`-1)) + DEBUG "detect_root_device: candidate $i has $DEV_NUM_PARTITIONS numeric partitions" if [ ${DEV_NUM_PARTITIONS} -eq 0 ]; then echo $i >> /tmp_root_device_list else @@ -381,16 +473,41 @@ detect_root_device() fi done 10/tmp/root-hashes-gui/lvm_vg 2>/dev/null; then - # It's not an LVM PV + # Try to query whether DEVICE is an LVM physical volume. On systems + # without LVM the command may not exist; treat that like "not a PV". + if ! lvm pvs --noheadings -o vg_name "$DEVICE" >/tmp/root-hashes-gui/lvm_vg 2>/tmp/root-hashes-gui/lvm_err; then + # It's not an LVM PV, or lvm failed entirely. Log stderr for debugging. + DEBUG "lvm pvs failed for $DEVICE, stderr:" "$(cat /tmp/root-hashes-gui/lvm_err)" + # try any children shown by lsblk (handles LUKS containers with + # internal partitions such as dm-0, dm-1 etc). + if command -v lsblk >/dev/null 2>&1; then + DEBUG "find_lvm_vg_name: lsblk children of $DEVICE" + for part in $(lsblk -np -l -o NAME "$DEVICE" | tail -n +2); do + [ -b "$part" ] || continue + DEBUG "find_lvm_vg_name: testing child $part" + if lvm pvs --noheadings -o vg_name "$part" >/tmp/root-hashes-gui/lvm_vg 2>/tmp/root-hashes-gui/lvm_err; then + VG="$(awk 'NF {print $1; exit}' /tmp/root-hashes-gui/lvm_vg)" + [ -n "$VG" ] && { echo "$VG"; return 0; } + fi + done + fi + DEBUG "find_lvm_vg_name: $DEVICE is not an LVM PV" return 1 fi