From 6a5456d7e8bd0cb9e7bd6fde05c4925e5c77e9dc Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 4 Feb 2026 14:28:39 +0100 Subject: [PATCH 1/4] Update NVIDIA driver symlink script --- .../nvidia/link_nvidia_host_libraries.sh | 146 ++++++------------ 1 file changed, 50 insertions(+), 96 deletions(-) diff --git a/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh b/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh index 8a6ee9b2..2fa3a9df 100755 --- a/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh +++ b/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -448,27 +448,61 @@ find_cuda_libraries_on_host() { } # Actually symlinks the Matched libraries to correct folders. -# Then also creates "host" and "latest" folder symlinks symlink_mode () { # First let's make sure the driver libraries are not already in place # Have to link drivers = True link_drivers=1 - # Make sure that target of host_injections variant symlink is an existing directory - echo "Ensure host_injections directory" - host_injections_target=$(realpath -m "${EESSI_CVMFS_REPO}/host_injections") - log_verbose "host_injections_target: ${host_injections_target}" - if [ ! -d "$host_injections_target" ]; then + # Do some checks on existence of links and that we don't end up at /dev/null (the default), so we can print some informative information + # One downside is that we can't explicitely check if something is a variant symlink, so we'll just assume that if it's a link AND it + # lives in our CVMFS repository, it must be a variant symlink + nvidia_trusted_dir="${EESSI_EPREFIX}/lib/nvidia" + if [[ -L "$nvidia_trusted_dir" ]]; then + target1=$(readlink "$nvidia_trusted_dir") + log_verbose "$nvidia_trusted_dir is a CVMFS variant symlink (EESSI_${ESSSI_VERSION//./}_NVIDIA_OVERRIDE) currently pointing to $target1" + # If this is a link, and if it lives in the EESSI_CVMFS_REPO, we assume this is a variant symlink + if [[ -L "$target1" && "$target1" == "$EESSI_CVMFS_REPO"/* ]]; then + target2=$(readlink "$target1") + msg="${target1} appears to be a CVMFS variant symlink (EESSI_NVIDIA_OVERRIDE_DEFAULT) currently pointing to ${target2}." + msg="${msg} Proceeding to install host symlinks in ${target2}." + log_verbose "${msg}" + + # Check if target2 isn't /dev/null (the default target of the EESSI_NVIDIA_OVERRIDE_DEFAULT variant symlink) + # If it is, suggest setting EESSI_NVIDIA_OVERRIDE_DEFAULT or EESSI_${ESSSI_VERSION//./}_NVIDIA_OVERRIDE + if [[ $target2 == /dev/null ]]; then + msg="${nvidia_trusted_dir} is a symlink pointing to ${target1}, which is a symlink pointing to ${target2}\n" + msg="${msg}If you want to symlink the drivers in a single location for all EESSI versions, please define" + msg="${msg} the EESSI_NVIDIA_OVERRIDE_DEFAULT variant symlink in your local CVMFS configuration to point to" + msg="${msg} writeable location. This will change the target of symlink ${target1}.\n" + msg="${msg}If you want to symlink the drivers only for this version of EESSI (${EESSI_VERSION}), please define" + msg="${msg} the EESSI_${ESSSI_VERSION//./}_NVIDIA_OVERRIDE variant symlink in your local CVMFS configuration to point to" + msg="${msg} writeable location. This will change the target of symlink ${nvidia_trusted_dir}.\n" + fatal_error "${msg}" + fi + else + msg="$target1 does not seem to be a CVMFS variant symlink, suggesting that EESSI_${ESSSI_VERSION//./}_NVIDIA_OVERRIDE" + msg="${msg} was set in the CVMFS config. Proceeding to install host symlinks in $target1." + log_verbose "${msg}" + fi + else + msg="$nvidia_trusted_dir is expected to be a symlink, but it's not. This will likely fail" + msg="${msg} as CVMFS repositories are read-only. Proceeding anyway, but expect this to fail." + echo_yellow "${msg}" + fi + + # Make sure that target of nvidia_trusted_dir variant symlink is an existing directory + install_target=$(readlink -f "$nvidia_trusted_dir") + echo "Ensure the final target of ${nvidia_trusted_dir} (${install_target}) exists" + log_verbose "Target directory in which driver symlinks will be installed: ${install_target}" + if [ ! -d "$install_target" ]; then check_global_read - create_directory_structure "$host_injections_target" + if ! create_directory_structure "$install_target"; then + fatal_error "No write permissions to directory ${install_target}" + fi fi - # Define proper nvidia directory structure for host_injections in EESSI - host_injections_nvidia_dir="${EESSI_CVMFS_REPO}/host_injections/nvidia/${EESSI_CPU_FAMILY}" - host_injection_driver_dir="${host_injections_nvidia_dir}/host" - host_injection_driver_version_file="${host_injection_driver_dir}/driver_version.txt" - log_verbose "host_injections_nvidia_dir: ${host_injections_nvidia_dir}" - log_verbose "host_injection_driver_dir: ${host_injection_driver_dir}" + # Define file to store driver version that was symlinked + host_injection_driver_version_file="${install_target}/driver_version.txt" log_verbose "host_injection_driver_version_file: ${host_injection_driver_version_file}" # Check if drivers are already linked with correct version @@ -484,7 +518,7 @@ symlink_mode () { # There's something there but it is out of date echo_yellow "The host GPU driver libraries version have changed. Now its: (v${HOST_GPU_DRIVER_VERSION})" echo_yellow "Cleaning out outdated symlinks." - rm "${host_injection_driver_dir}"/* || fatal_error "Unable to remove files under '${host_injection_driver_dir}'." + rm "${install_target}"/* || fatal_error "Unable to remove files under '${install_target}'." fi fi @@ -496,14 +530,8 @@ symlink_mode () { # Have to link drivers if [ "$link_drivers" -eq 1 ]; then # Link the matched libraries - - echo_green "Linking drivers to the host_injection folder" - check_global_read - if ! create_directory_structure "${host_injection_driver_dir}" ; then - fatal_error "No write permissions to directory ${host_injection_driver_dir}" - fi - cd "${host_injection_driver_dir}" || fatal_error "Failed to cd to ${host_injection_driver_dir}" + cd "${install_target}" || fatal_error "Failed to cd to ${install_target}" log_verbose "Changed directory to: $PWD" # Make symlinks to all the interesting libraries @@ -541,80 +569,6 @@ symlink_mode () { drivers_linked=1 fi - - # Make latest symlink for NVIDIA drivers - cd "$host_injections_nvidia_dir" || fatal_error "Failed to cd to $host_injections_nvidia_dir" - log_verbose "Changed directory to: $PWD" - symlink="latest" - - # Check if the symlink exists - if [ -L "$symlink" ]; then - # If the drivers were linked this run - relink the symlink! - if [ "$drivers_linked" -eq 1 ]; then - # Force relinking the current link. - # Need to remove the link first, otherwise this will follow existing symlink - # and create host directory one level down ! - rm "$symlink" || fatal_error "Failed to remove symlink ${symlink}" - - if ln -sf host "$symlink" - then - echo "Successfully force recreated symlink between $symlink and host in $PWD" - else - fatal_error "Failed to force recreate symlink between $symlink and host in $PWD" - fi - fi - else - # If the symlink doesn't exists, create normal one. - if ln -s host "$symlink" - then - echo "Successfully created symlink between $symlink and host in $PWD" - else - fatal_error "Failed to create symlink between $symlink and host in $PWD" - fi - fi - - # Make sure the libraries can be found by the EESSI linker - host_injection_linker_dir=${EESSI_EPREFIX/versions/host_injections} - if [ -L "$host_injection_linker_dir/lib" ]; then - # Use readlink without -f to get direct symlink target - # using -f option will create "lastest" symlink one dir deeper (inside host) - target_path=$(readlink "$host_injection_linker_dir/lib") - expected_target="$host_injections_nvidia_dir/latest" - - log_verbose "Checking symlink target for EESSI linker:" - log_verbose "Current target: $target_path" - log_verbose "Expected target: $expected_target" - - # Update symlink if needed - if [ "$target_path" != "$expected_target" ]; then - cd "$host_injection_linker_dir" || fatal_error "Failed to cd to $host_injection_linker_dir" - log_verbose "Changed directory to: $PWD" - - - if ln -sf "$expected_target" lib - then - echo "Successfully force created symlink between $expected_target and lib in $PWD" - else - fatal_error "Failed to force create symlink between $expected_target and lib in $PWD" - fi - else - log_verbose "Symlink already points to correct target" - fi - else - # Just start from scratch, symlink doesn't exists. - check_global_read - create_directory_structure "$host_injection_linker_dir" - cd "$host_injection_linker_dir" || fatal_error "Failed to cd to $host_injection_linker_dir" - log_verbose "Changed directory to: $PWD" - - if ln -s "$host_injections_nvidia_dir/latest" lib - then - echo "Successfully created symlink between $host_injections_nvidia_dir/latest and lib in $PWD" - else - fatal_error "Failed to create symlink between $host_injections_nvidia_dir/latest and lib in $PWD" - fi - fi - } # Logging function for verbose mode @@ -635,7 +589,7 @@ check_eessi_initialised # Verify nvidia-smi availability log_verbose "Checking for nvidia-smi command..." -command -v nvidia-smi >/dev/null 2>&1 || { echo_yellow "nvidia-smi not found, this script won't do anything useful"; return 1; } +command -v nvidia-smi >/dev/null 2>&1 || { echo_yellow "nvidia-smi not found, this script won't do anything useful"; exit 1; } # Parse command line arguments while [[ "$#" -gt 0 ]]; do From 730b8ab906c65097a22d4e96cbc0512d2d7973d9 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen <33718780+casparvl@users.noreply.github.com> Date: Tue, 17 Feb 2026 15:09:34 +0100 Subject: [PATCH 2/4] Update scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bob Dröge --- scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh b/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh index 2fa3a9df..cbad9141 100755 --- a/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh +++ b/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -475,7 +475,7 @@ symlink_mode () { msg="${msg} the EESSI_NVIDIA_OVERRIDE_DEFAULT variant symlink in your local CVMFS configuration to point to" msg="${msg} writeable location. This will change the target of symlink ${target1}.\n" msg="${msg}If you want to symlink the drivers only for this version of EESSI (${EESSI_VERSION}), please define" - msg="${msg} the EESSI_${ESSSI_VERSION//./}_NVIDIA_OVERRIDE variant symlink in your local CVMFS configuration to point to" + msg="${msg} the EESSI_${EESSI_VERSION//./}_NVIDIA_OVERRIDE variant symlink in your local CVMFS configuration to point to" msg="${msg} writeable location. This will change the target of symlink ${nvidia_trusted_dir}.\n" fatal_error "${msg}" fi From acef77ae0004ab3cbec1c6da92aa75221ba3753f Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen <33718780+casparvl@users.noreply.github.com> Date: Tue, 17 Feb 2026 15:12:27 +0100 Subject: [PATCH 3/4] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bob Dröge --- scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh b/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh index cbad9141..9e3d9552 100755 --- a/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh +++ b/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -459,7 +459,7 @@ symlink_mode () { nvidia_trusted_dir="${EESSI_EPREFIX}/lib/nvidia" if [[ -L "$nvidia_trusted_dir" ]]; then target1=$(readlink "$nvidia_trusted_dir") - log_verbose "$nvidia_trusted_dir is a CVMFS variant symlink (EESSI_${ESSSI_VERSION//./}_NVIDIA_OVERRIDE) currently pointing to $target1" + log_verbose "$nvidia_trusted_dir is a CVMFS variant symlink (EESSI_${EESSI_VERSION//./}_NVIDIA_OVERRIDE) currently pointing to $target1" # If this is a link, and if it lives in the EESSI_CVMFS_REPO, we assume this is a variant symlink if [[ -L "$target1" && "$target1" == "$EESSI_CVMFS_REPO"/* ]]; then target2=$(readlink "$target1") @@ -468,7 +468,7 @@ symlink_mode () { log_verbose "${msg}" # Check if target2 isn't /dev/null (the default target of the EESSI_NVIDIA_OVERRIDE_DEFAULT variant symlink) - # If it is, suggest setting EESSI_NVIDIA_OVERRIDE_DEFAULT or EESSI_${ESSSI_VERSION//./}_NVIDIA_OVERRIDE + # If it is, suggest setting EESSI_NVIDIA_OVERRIDE_DEFAULT or EESSI_${EESSI_VERSION//./}_NVIDIA_OVERRIDE if [[ $target2 == /dev/null ]]; then msg="${nvidia_trusted_dir} is a symlink pointing to ${target1}, which is a symlink pointing to ${target2}\n" msg="${msg}If you want to symlink the drivers in a single location for all EESSI versions, please define" @@ -480,7 +480,7 @@ symlink_mode () { fatal_error "${msg}" fi else - msg="$target1 does not seem to be a CVMFS variant symlink, suggesting that EESSI_${ESSSI_VERSION//./}_NVIDIA_OVERRIDE" + msg="$target1 does not seem to be a CVMFS variant symlink, suggesting that EESSI_${EESSI_VERSION//./}_NVIDIA_OVERRIDE" msg="${msg} was set in the CVMFS config. Proceeding to install host symlinks in $target1." log_verbose "${msg}" fi From 5d1545614437ffb391d7378d682da47ecce5d498 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 18 Feb 2026 10:05:24 +0100 Subject: [PATCH 4/4] Preserve old symlinking logic, so that symlinking can also still be done for 2023.06 --- .../nvidia/link_nvidia_host_libraries.sh | 179 +++++++++++++++++- 1 file changed, 178 insertions(+), 1 deletion(-) diff --git a/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh b/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh index 2fa3a9df..9136e156 100755 --- a/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh +++ b/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -447,6 +447,179 @@ find_cuda_libraries_on_host() { fi } +# Symlink structure changed from 2025.06 onwards. This function reflects the symlinking as it was done for EESSI 2023.06 +# Actually symlinks the Matched libraries to correct folders. +# Then also creates "host" and "latest" folder symlinks +symlink_mode_202306 () { + # First let's make sure the driver libraries are not already in place + # Have to link drivers = True + link_drivers=1 + + # Make sure that target of host_injections variant symlink is an existing directory + echo "Ensure host_injections directory" + host_injections_target=$(realpath -m "${EESSI_CVMFS_REPO}/host_injections") + log_verbose "host_injections_target: ${host_injections_target}" + if [ ! -d "$host_injections_target" ]; then + check_global_read + create_directory_structure "$host_injections_target" + fi + + # Define proper nvidia directory structure for host_injections in EESSI + host_injections_nvidia_dir="${EESSI_CVMFS_REPO}/host_injections/nvidia/${EESSI_CPU_FAMILY}" + host_injection_driver_dir="${host_injections_nvidia_dir}/host" + host_injection_driver_version_file="${host_injection_driver_dir}/driver_version.txt" + log_verbose "host_injections_nvidia_dir: ${host_injections_nvidia_dir}" + log_verbose "host_injection_driver_dir: ${host_injection_driver_dir}" + log_verbose "host_injection_driver_version_file: ${host_injection_driver_version_file}" + + # Check if drivers are already linked with correct version + # This is done by comparing host_injection_driver_version_file (driver_version.txt) + # This is needed when updating GPU drivers. + if [ -e "$host_injection_driver_version_file" ]; then + if grep -q "$HOST_GPU_DRIVER_VERSION" "$host_injection_driver_version_file"; then + echo_green "The host GPU driver libraries (v${HOST_GPU_DRIVER_VERSION}) have already been linked! (based on ${host_injection_driver_version_file})" + # The GPU libraries were already linked for this version of CUDA driver + # Have to link drivers = False + link_drivers=0 + else + # There's something there but it is out of date + echo_yellow "The host GPU driver libraries version have changed. Now its: (v${HOST_GPU_DRIVER_VERSION})" + echo_yellow "Cleaning out outdated symlinks." + rm "${host_injection_driver_dir}"/* || fatal_error "Unable to remove files under '${host_injection_driver_dir}'." + fi + fi + + # Link all matched_libraries from Nvidia to correct host_injection folder + # This step is only run, when linking of drivers is needed (eg. link_drivers==1) + # Setup variable to track if some drivers were actually linked this run. + drivers_linked=0 + + # Have to link drivers + if [ "$link_drivers" -eq 1 ]; then + # Link the matched libraries + + echo_green "Linking drivers to the host_injection folder" + check_global_read + if ! create_directory_structure "${host_injection_driver_dir}" ; then + fatal_error "No write permissions to directory ${host_injection_driver_dir}" + fi + + cd "${host_injection_driver_dir}" || fatal_error "Failed to cd to ${host_injection_driver_dir}" + log_verbose "Changed directory to: $PWD" + + # Make symlinks to all the interesting libraries + # Loop over each matched library + for library in "${MATCHED_LIBRARIES[@]}"; do + log_verbose "Linking library: ${library}" + + # Get just the library filename + lib_name=$(basename "$library") + + # Check if the symlink already exists + if [ -L "$lib_name" ]; then + # Check if it's pointing to the same target + target=$(readlink "$lib_name") + if [ "$target" = "$library" ]; then + log_verbose "Symlink for $lib_name already exists and points to correct target" + continue + else + log_verbose "Symlink for $lib_name exists but points to wrong target: $target, updating..." + rm "$lib_name" + fi + fi + + # Create a symlink in the current directory + # and check if the symlink was created successfully + if ! ln -s "$library" . + then + fatal_error "Error: Failed to create symlink for library $library in $PWD" + fi + done + + # Inject driver and CUDA versions into the directory + echo "$HOST_GPU_DRIVER_VERSION" > driver_version.txt + echo "$HOST_GPU_CUDA_VERSION" > cuda_version.txt + + drivers_linked=1 + fi + + # Make latest symlink for NVIDIA drivers + cd "$host_injections_nvidia_dir" || fatal_error "Failed to cd to $host_injections_nvidia_dir" + log_verbose "Changed directory to: $PWD" + symlink="latest" + + # Check if the symlink exists + if [ -L "$symlink" ]; then + # If the drivers were linked this run - relink the symlink! + if [ "$drivers_linked" -eq 1 ]; then + # Force relinking the current link. + # Need to remove the link first, otherwise this will follow existing symlink + # and create host directory one level down ! + rm "$symlink" || fatal_error "Failed to remove symlink ${symlink}" + + if ln -sf host "$symlink" + then + echo "Successfully force recreated symlink between $symlink and host in $PWD" + else + fatal_error "Failed to force recreate symlink between $symlink and host in $PWD" + fi + fi + else + # If the symlink doesn't exists, create normal one. + if ln -s host "$symlink" + then + echo "Successfully created symlink between $symlink and host in $PWD" + else + fatal_error "Failed to create symlink between $symlink and host in $PWD" + fi + fi + + # Make sure the libraries can be found by the EESSI linker + host_injection_linker_dir=${EESSI_EPREFIX/versions/host_injections} + if [ -L "$host_injection_linker_dir/lib" ]; then + # Use readlink without -f to get direct symlink target + # using -f option will create "lastest" symlink one dir deeper (inside host) + target_path=$(readlink "$host_injection_linker_dir/lib") + expected_target="$host_injections_nvidia_dir/latest" + + log_verbose "Checking symlink target for EESSI linker:" + log_verbose "Current target: $target_path" + log_verbose "Expected target: $expected_target" + + # Update symlink if needed + if [ "$target_path" != "$expected_target" ]; then + cd "$host_injection_linker_dir" || fatal_error "Failed to cd to $host_injection_linker_dir" + log_verbose "Changed directory to: $PWD" + + + if ln -sf "$expected_target" lib + then + echo "Successfully force created symlink between $expected_target and lib in $PWD" + else + fatal_error "Failed to force create symlink between $expected_target and lib in $PWD" + fi + else + log_verbose "Symlink already points to correct target" + fi + else + # Just start from scratch, symlink doesn't exists. + check_global_read + create_directory_structure "$host_injection_linker_dir" + cd "$host_injection_linker_dir" || fatal_error "Failed to cd to $host_injection_linker_dir" + log_verbose "Changed directory to: $PWD" + + if ln -s "$host_injections_nvidia_dir/latest" lib + then + echo "Successfully created symlink between $host_injections_nvidia_dir/latest and lib in $PWD" + else + fatal_error "Failed to create symlink between $host_injections_nvidia_dir/latest and lib in $PWD" + fi + fi + +} + + +# Symlink structure changed from 2025.06 onwards. This function reflects the new symlinking # Actually symlinks the Matched libraries to correct folders. symlink_mode () { # First let's make sure the driver libraries are not already in place @@ -639,7 +812,11 @@ fi # === 5b. Symlink Mode === # If we haven't already exited, we may need to create the symlinks -symlink_mode +if [ "$EESSI_VERSION" == '2023.06' ]; then + symlink_mode_202306 +else + symlink_mode +fi # If everything went OK, show success message echo_green "Host NVIDIA GPU drivers linked successfully for EESSI"