Skip to content
Open
247 changes: 183 additions & 64 deletions eb_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import json
import os
import re
from typing import NamedTuple

import easybuild.tools.environment as env
from easybuild.easyblocks.generic.configuremake import obtain_config_guess
Expand Down Expand Up @@ -43,6 +44,8 @@
EESSI_RPATH_OVERRIDE_ATTR = 'orig_rpath_override_dirs'
EESSI_MODULE_ONLY_ATTR = 'orig_module_only'
EESSI_FORCE_ATTR = 'orig_force'
EESSI_SUPPORTED_MODULE_ATTR = 'eessi_supported_module'
EESSI_UNSUPPORTED_MODULE_ATTR = 'eessi_unsupported_module'

SYSTEM = EASYCONFIG_CONSTANTS['SYSTEM'][0]

Expand Down Expand Up @@ -71,6 +74,40 @@
{'name': 'lfoss', 'version': '2025b'}
)

# Supported compute capabilities by CUDA toolkit version
# Obtained by installing all CUDAs from 12.0.0 to 13.1.0, then using:

# #!/bin/bash
Copy link
Contributor Author

@casparvl casparvl Jan 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's worth leaving this here as a breadcrumb to future contributors, since we'll have to update this list occasionally and doing it manually is silly - especially if you want to add compatibility for a range of toolkit versions

#
# CUDA_VERS=(12.0.0 12.1.0 12.1.1 12.2.0 12.2.2 12.3.0 12.3.2 12.4.0 12.5.0 12.6.0 12.8.0 12.9.0 12.9.1 13.0.0 13.0.1 13.0.2 13.1.0)
#
# for ver in ${CUDA_VERS[@]}; do
# module load CUDA/${ver}
# ccs=$(nvcc --list-gpu-arch)
# ccs=$(echo ${ccs} | sed "s/ /', /g" | sed "s/compute_/'/g")
# echo " '${ver}': [${ccs}'],"
# module unload CUDA
# done

CUDA_SUPPORTED_CCS = {
'12.0.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.1.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.1.1': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.2.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.2.2': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.3.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.3.2': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.4.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.5.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.6.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.8.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90', '100', '101', '120'],
'12.9.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90', '100', '101', '103', '120', '121'],
'12.9.1': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90', '100', '101', '103', '120', '121'],
'13.0.0': ['75', '80', '86', '87', '88', '89', '90', '100', '110', '103', '120', '121'],
'13.0.1': ['75', '80', '86', '87', '88', '89', '90', '100', '110', '103', '120', '121'],
'13.0.2': ['75', '80', '86', '87', '88', '89', '90', '100', '110', '103', '120', '121'],
'13.1.0': ['75', '80', '86', '87', '88', '89', '90', '100', '110', '103', '120', '121'],
}

# Ensure that we don't print any messages in --terse mode
# Note that --terse was introduced in EB 4.9.1
Expand Down Expand Up @@ -114,6 +151,55 @@ def is_gcccore_1220_based(**kwargs):
)



def get_cuda_version(ec, check_deps=True, check_builddeps=True):
"""
Returns the CUDA version that this EasyConfig (ec) uses as a (build)dependency.
If (ec) is simply CUDA itself, it will return the version.
If no CUDA is used as (build)dependency, this function returns None.
"""
cudaver = None
ec_dict = ec.asdict()

# Is this CUDA itself?
if ec.name == 'CUDA':
cudaver = ec.version

# At this point, CUDA should be a builddependency due to inject_gpu_property
# changing any CUDA dep to a builddependency. But, for robustness, just check both
deps = []
if check_deps:
deps = deps + ec_dict['dependencies'][:]
if check_builddeps:
deps = deps + ec_dict['builddependencies'][:]

# Provide default
for dep in deps:
if dep['name'] == 'CUDA':
cudaver = dep['version']

return cudaver


def is_cuda_cc_supported_by_toolkit(cuda_cc, toolkit_version):
"""
Checks if the CUDA Compute Capability passed in cuda_cc is supported by the CUDA toolkit version toolkit_version
Returns True if supported or False if not supported
"""
# Clean cuda_cc of any suffixes like the 'a' in '9.0a'
# The regex expects one or more digits, a dot, one or more digits, and then optionally any number of characters
# It will strip all characters by only return the first capture group (the digits and dot)
cuda_cc = re.sub(r'^(\d+\.\d+)[a-zA-Z]*$', r'\1', cuda_cc)
Copy link
Contributor Author

@casparvl casparvl Jan 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The lookup table contains CCs in the format of 90, 100, etc, so no periods, and no suffixes. The CUDA compute capabilities passed to EasyBuild contain periods (for sure) and can contain suffixes. So to compare, we need to strip the suffix from EB's CUDA CC, and remove the ..


# Strip the dot
cuda_cc = cuda_cc.replace('.', '')

if cuda_cc in CUDA_SUPPORTED_CCS[toolkit_version]:
return True
else:
return False


def get_eessi_envvar(eessi_envvar):
"""Get an EESSI environment variable from the environment"""

Expand Down Expand Up @@ -155,11 +241,6 @@ def parse_hook(ec, *args, **kwargs):
if ec.name in PARSE_HOOKS:
PARSE_HOOKS[ec.name](ec, eprefix)

# Always trigger this one, regardless of ec.name
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if cpu_target == CPU_TARGET_ZEN4:
parse_hook_zen4_module_only(ec, eprefix)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now handled in the pre_module_hook_unsupported_modules.


# inject the GPU property (if required)
ec = inject_gpu_property(ec)

Expand Down Expand Up @@ -288,6 +369,22 @@ def post_ready_hook(self, *args, **kwargs):
print_msg(msg % (new_parallel, curr_parallel, session_parallel, self.name, cpu_target), log=self.log)


def pre_prepare_hook_unsupported_modules(self, *args, **kwargs):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replaces the specific pre_prepare_hook_ignore_zen4_gcccore1220_error we had before.

"""Set env var to ignore specific LmodErrors from dependencies if this module is know to be unsupported"""
if is_unsupported_module(self):
unsup_mod = getattr(self, EESSI_UNSUPPORTED_MODULE_ATTR)
print_msg(f"Setting {unsup_mod.envvar} to allow loading dependencies that otherwise throw an LmodError")
os.environ[unsup_mod.envvar] = "1"


def post_prepare_hook_unsupported_modules(self, *args, **kwargs):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replaces the post_prepare_hook_ignore_zen4_gcccore1220_error hook we had before

"""Unset env var to ignore specific LmodErrors from dependencies if this module is know to be unsupported"""
if is_unsupported_module(self):
unsup_mod = getattr(self, EESSI_UNSUPPORTED_MODULE_ATTR)
print_msg(f"Unsetting {unsup_mod.envvar}")
del os.environ[unsup_mod.envvar]


def pre_prepare_hook(self, *args, **kwargs):
"""Main pre-prepare hook: trigger custom functions."""

Expand Down Expand Up @@ -318,10 +415,8 @@ def pre_prepare_hook(self, *args, **kwargs):
if self.name in PRE_PREPARE_HOOKS:
PRE_PREPARE_HOOKS[self.name](self, *args, **kwargs)

# Always trigger this one, regardless of ec.name
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if cpu_target == CPU_TARGET_ZEN4:
pre_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs)
# Always trigger this, regardless of ec.name
pre_prepare_hook_unsupported_modules(self, *args, **kwargs)
Copy link
Contributor Author

@casparvl casparvl Jan 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Run the new hook instead of the old one. All the logic to check if something is an unsupported module is now contained within is_unsupported_module, so no more use for checking the cpu_target.



def post_prepare_hook_gcc_prefixed_ld_rpath_wrapper(self, *args, **kwargs):
Expand Down Expand Up @@ -387,10 +482,8 @@ def post_prepare_hook(self, *args, **kwargs):
if self.name in POST_PREPARE_HOOKS:
POST_PREPARE_HOOKS[self.name](self, *args, **kwargs)

# Always trigger this one, regardless of ec.name
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if cpu_target == CPU_TARGET_ZEN4:
post_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs)
# Always trigger this, regardless of ec.name
post_prepare_hook_unsupported_modules(self, *args, **kwargs)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Run the new hook instead of the old one. All the logic to check if something is an unsupported module is now contained within is_unsupported_module, so no more use for checking the cpu_target.



def parse_hook_casacore_disable_vectorize(ec, eprefix):
Expand Down Expand Up @@ -556,24 +649,6 @@ def parse_hook_freeimage_aarch64(ec, *args, **kwargs):
print_msg("Changed toolchainopts for %s: %s", ec.name, ec['toolchainopts'])


def parse_hook_zen4_module_only(ec, eprefix):
Copy link
Contributor Author

@casparvl casparvl Jan 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding the LmodError to the modluafooter is now done in the generic pre_module_hook_unsupported_module hook

"""
Use --force --module-only if building a foss-2022b-based EasyConfig for Zen4.
This toolchain will not be supported on Zen4, so we will generate a modulefile
and have it print an LmodError.
"""
if is_gcccore_1220_based(ecname=ec['name'], ecversion=ec['version'], tcname=ec['toolchain']['name'],
tcversion=ec['toolchain']['version']):
env_varname = EESSI_IGNORE_ZEN4_GCC1220_ENVVAR
# TODO: create a docs page to which we can refer for more info here
# TODO: then update the link to the known issues page to the _specific_ issue
# Need to escape the newline character so that the newline character actually ends up in the module file
# (otherwise, it splits the string, and a 2-line string ends up in the modulefile, resulting in syntax error)
errmsg = "EasyConfigs using toolchains based on GCCcore-12.2.0 are not supported for the Zen4 architecture.\\n"
errmsg += "See https://www.eessi.io/docs/known_issues/eessi-<EESSI_VERSION>/#gcc-1220-and-foss-2022b-based-modules-cannot-be-loaded-on-zen4-architecture"
ec['modluafooter'] = 'if (not os.getenv("%s")) then LmodError("%s") end' % (env_varname, errmsg)


def pre_fetch_hook(self, *args, **kwargs):
"""Main pre fetch hook: trigger custom functions based on software name."""
if self.name in PRE_FETCH_HOOKS:
Expand Down Expand Up @@ -614,17 +689,75 @@ def pre_fetch_hook_check_installation_path(self, *args, **kwargs):
)


def is_unsupported_module(ec):
class UnsupportedModule(NamedTuple):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a named tuple so that we can have access to the environment variable name and error message through clearly named attributes. That's less sensitive to messing up compared to a regular tuple, where you'd have to remember what is stored in the first and what is stored in the second element of the tuple.

"""
Environment variable and error message for an unsupported module.
envvar: the name of the environment variable that needs to be set to ignore the LmodError
that this unsupported module would otherwise generate
errmsg: the actual LmodError message that should be printed
"""
envvar: str
errmsg: str


def is_unsupported_module(self):
"""
Determine if the given module is unsupported in EESSI, and hence if a dummy module needs to be built that just prints an LmodError.
If true, this function returns the name of the environment variable that can be used to ignore that particular LmodError,
as this is still required to actually build the module itself (EasyBuild will load/test the module).
Otherwise, it returns False.
If a module is unsupported, this function will set the EESSI_UNSUPPORTED_MODULE_ATTR attribute on `self`,
and assign an `UnsupportedModule` NamedTuple to it.
If a module is supported, this function will set the EESSI_SUPPORTED_MODULE_ATTR attribut on `self`
(and set it to True).
"""
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')

if cpu_target == CPU_TARGET_ZEN4 and is_gcccore_1220_based(ecname=ec.name, ecversion=ec.version, tcname=ec.toolchain.name, tcversion=ec.toolchain.version):
return EESSI_IGNORE_ZEN4_GCC1220_ENVVAR
# If this function was already called by an earlier hook, evaluation of whether this is an unsupported module was
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At this point in time, the is_unsupported_module function is called 6 or 7 times. Since it may become quite lengthy with lots of logic if we keep adding cases for modules that are unsupported, we want an early return for optimization in case this has already been evaluated before. We can easily do that by checking if either the EESSI_SUPPORTED_MODULE_ATTR or EESSI_UNSUPPORTED_MODULE_ATTR have been set.

If neither has been set, this is the first time we are evaluating this function and we should go through the full logic.

# already done. No need to redo it: save time and return early
if hasattr(self, EESSI_SUPPORTED_MODULE_ATTR):
return False
elif hasattr(self, EESSI_UNSUPPORTED_MODULE_ATTR):
return True

# Foss-2022b is not supported on Zen4
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Next time we have unsupported modules, this function is the only one that needs changing: we simply add a case to it. A case typically has:

  • Logic (if statements) to determine if this is an unsupported module
  • Print a warning message to stdout to make it clear we're doing something out-of-the-ordinary in this installation
  • Define the LmodError message that should be embedded in the modulefile
  • Define the environment variable name that can be used to suppress the LmodError

cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if cpu_target == CPU_TARGET_ZEN4 and is_gcccore_1220_based(ecname=self.name, ecversion=self.version, tcname=self.toolchain.name, tcversion=self.toolchain.version):
msg = "EasyConfigs using toolchains based on GCCcore-12.2.0 are not supported on Zen4 architectures. "
msg += "Building with '--module-only --force' and injecting an LmodError into the modulefile."
print_warning(msg)
errmsg = "EasyConfigs using toolchains based on GCCcore-12.2.0 are not supported for the Zen4 architecture.\\n"
errmsg += "See https://www.eessi.io/docs/known_issues/eessi-<EESSI_VERSION>/#gcc-1220-and-foss-2022b-based-modules-cannot-be-loaded-on-zen4-architecture"
var=EESSI_IGNORE_ZEN4_GCC1220_ENVVAR
setattr(self, EESSI_UNSUPPORTED_MODULE_ATTR, UnsupportedModule(envvar=var, errmsg=errmsg))
return True

# If the CUDA toolkit is a dependency, check that it supports (all) requested CUDA Compute Capabilities
# Otherwise, mark this as unsupported
cudaver = get_cuda_version(ec=self.cfg, check_deps=True, check_builddeps=True)
if cudaver:
# cuda_ccs_string is e.g. "8.0,9.0"
cuda_ccs_string = self.cfg.get_cuda_cc_template_value('cuda_compute_capabilities', required=False)
# cuda_ccs is empty if none are defined
if cuda_ccs_string:
# cuda_ccs is a comma-seperated string. Convert to list for easier handling
cuda_ccs = cuda_ccs_string.split(',')
# Check if any of the CUDA CCs is unsupported. If so, append the error
if any(
[not is_cuda_cc_supported_by_toolkit(cuda_cc=cuda_cc, toolkit_version=cudaver) for cuda_cc in cuda_ccs]
):
msg = f"Requested a CUDA Compute Capability ({cuda_ccs}) that is not supported by the CUDA "
msg += f"toolkit version ({cudaver}) used by this software. Switching to '--module-only --force' "
msg += "and injectiong an LmodError into the modulefile."
print_warning(msg)
# Use a normalized variable name for the CUDA ccs: strip any suffix, and replace commas
cuda_ccs_string = re.sub(r'[a-zA-Z]', '', cuda_ccs_string).replace(',', '_')
# Also replace periods, those are not officially supported in environment variable names
var=f"EESSI_IGNORE_CUDA_{cudaver}_CC_{cuda_ccs_string}".replace('.', '_')
errmsg = f"EasyConfigs using CUDA {cudaver} or older are not supported for (all) requested Compute "
errmsg +=f"Capabilities: {cuda_ccs}.\\n"
UnsupportedModule(envvar=var,errmsg=errmsg)
setattr(self, EESSI_UNSUPPORTED_MODULE_ATTR, UnsupportedModule(envvar=var,errmsg=errmsg))
return True

# If all the above logic passed, this module is supported
setattr(self, EESSI_SUPPORTED_MODULE_ATTR, True)
return False


Expand All @@ -651,18 +784,21 @@ def pre_fetch_hook_unsupported_modules(self, *args, **kwargs):

def pre_module_hook_unsupported_module(self, *args, **kwargs):
"""Make module load-able during module step"""
ignore_lmoderror_envvar = is_unsupported_module(self)
if ignore_lmoderror_envvar:
if is_unsupported_module(self):
unsup_mod = getattr(self, EESSI_UNSUPPORTED_MODULE_ATTR)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get the UnsupportedModule tuple, so we can use it to set the environment variable that suppresses the LmodError.

if hasattr(self, 'initial_environ'):
# Allow the module to be loaded in the module step (which uses initial environment)
print_msg(f"Setting {ignore_lmoderror_envvar} in initial environment")
self.initial_environ[ignore_lmoderror_envvar] = "1"
print_msg(f"Setting {unsup_mod.envvar} in initial environment")
self.initial_environ[unsup_mod.envvar] = "1"
extra_footer='if (not os.getenv("%s")) then LmodError("%s") end' % (unsup_mod.envvar, unsup_mod.errmsg)
# Append extra_footer if a modluafooter already exists. Otherwise, simply assign
self.cfg['modluafooter'] = self.cfg['modluafooter'] + '\n' + extra_footer if self.cfg['modluafooter'] else extra_footer


def post_module_hook_unsupported_module(self, *args, **kwargs):
"""Revert changes from pre_fetch_hook_unsupported_modules"""
ignore_lmoderror_envvar = is_unsupported_module(self)
if ignore_lmoderror_envvar:
if is_unsupported_module(self):
unsup_mod = getattr(self, EESSI_UNSUPPORTED_MODULE_ATTR)
if hasattr(self, EESSI_MODULE_ONLY_ATTR):
update_build_option('module_only', getattr(self, EESSI_MODULE_ONLY_ATTR))
print_msg("Restored original build option 'module_only' to %s" % getattr(self, EESSI_MODULE_ONLY_ATTR))
Expand All @@ -679,9 +815,9 @@ def post_module_hook_unsupported_module(self, *args, **kwargs):

# If the variable to allow loading is set, remove it
if hasattr(self, 'initial_environ'):
if self.initial_environ.get(ignore_lmoderror_envvar, False):
print_msg(f"Removing {ignore_lmoderror_envvar} in initial environment")
del self.initial_environ[ignore_lmoderror_envvar]
if self.initial_environ.get(unsup_mod.envvar, False):
print_msg(f"Removing {unsup_mod.envvar} in initial environment")
del self.initial_environ[unsup_mod.envvar]


def post_easyblock_hook_copy_easybuild_subdir(self, *args, **kwargs):
Expand All @@ -698,23 +834,6 @@ def post_easyblock_hook_copy_easybuild_subdir(self, *args, **kwargs):
copy_dir(app_easybuild_dir, app_reprod_dir)


# Modules for dependencies are loaded in the prepare step. Thus, that's where we need this variable to be set
# so that the modules can be succesfully loaded without printing the error (so that we can create a module
# _with_ the warning for the current software being installed)
def pre_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replaced by generic pre_prepare_hook_unsupported_modules

"""Set environment variable to ignore the LmodError from parse_hook_zen4_module_only during build phase"""
if is_gcccore_1220_based(ecname=self.name, ecversion=self.version, tcname=self.toolchain.name,
tcversion=self.toolchain.version):
os.environ[EESSI_IGNORE_ZEN4_GCC1220_ENVVAR] = "1"


def post_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replaced by generic post_prepare_hook_unsupported_modules

"""Unset environment variable to ignore the LmodError from parse_hook_zen4_module_only during build phase"""
if is_gcccore_1220_based(ecname=self.name, ecversion=self.version, tcname=self.toolchain.name,
tcversion=self.toolchain.version):
del os.environ[EESSI_IGNORE_ZEN4_GCC1220_ENVVAR]


def pre_prepare_hook_highway_handle_test_compilation_issues(self, *args, **kwargs):
"""
Solve issues with compiling or running the tests on both
Expand Down
Loading