Skip to content

Commit 03219e0

Browse files
committed
add single method to determine gpu software
1 parent 0b16e7a commit 03219e0

File tree

1 file changed

+30
-17
lines changed

1 file changed

+30
-17
lines changed

src/build_tools/hooks_hydra.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@
8383
SUFFIX_MODULES_PATH = 'collection'
8484
SUFFIX_MODULES_SYMLINK = 'all'
8585

86+
##################
87+
# MODULE FOOTERS #
88+
##################
89+
8690
INTEL_MPI_MOD_FOOTER = """
8791
if ( os.getenv("SLURM_JOB_ID") ) then
8892
setenv("I_MPI_HYDRA_BOOTSTRAP", "slurm")
@@ -97,6 +101,17 @@
97101
setenv("JAVA_TOOL_OPTIONS", "-Xmx" .. math.floor(mem*0.8))
98102
end
99103
"""
104+
GPU_DUMMY_MOD_FOOTER = """
105+
if mode() == "load" and not os.getenv("BUILD_TOOLS_LOAD_DUMMY_MODULES") then
106+
LmodError([[
107+
This module is only available on nodes with a GPU.
108+
Jobs can request GPUs with the command 'srun --gpus-per-node=1' or 'sbatch --gpus-per-node=1'.
109+
110+
More information in the VUB-HPC docs:
111+
https://hpc.vub.be/docs/job-submission/gpu-job-types/#gpu-jobs
112+
]])
113+
end
114+
"""
100115

101116

102117
def get_group(name, version):
@@ -178,6 +193,17 @@ def calc_tc_gen_subdir(name, version, tcname, tcversion, easyblock):
178193
return False, log_msg
179194

180195

196+
def is_gpu_software(ec):
197+
"determine if it is a GPU-only installation"
198+
gpu_components = ['CUDA']
199+
gpu_toolchains = ['nvidia-compilers', 'NVHPC']
200+
201+
is_gpu_package = ec.name in gpu_components or ec.name in gpu_toolchains
202+
needs_gpu_toolchain = ec.toolchain.name in gpu_toolchains
203+
needs_gpu_component = any([x in ec['versionsuffix'] for x in gpu_components])
204+
205+
return is_gpu_package or needs_gpu_toolchain or needs_gpu_component
206+
181207
def update_moduleclass(ec):
182208
"update the moduleclass of an easyconfig to <tc_gen>/all"
183209
tc_gen, log_msg = calc_tc_gen_subdir(
@@ -326,10 +352,7 @@ def parse_hook(ec, *args, **kwargs): # pylint: disable=unused-argument
326352
ec.log.info(f"[parse hook] Set optarch in parameter toolchainopts: {ec.toolchain.options['optarch']}")
327353

328354
# skip installation of CUDA software in non-GPU architectures, only create module file
329-
is_cuda_software = 'CUDA' in ec.name or 'CUDA' in ec['versionsuffix']
330-
cuda_tcs = ['CUDA', 'nvidia-compilers', 'NVHPC']
331-
is_cuda_software = ec.name in cuda_tcs or ec.toolchain.name in cuda_tcs or 'CUDA' in ec['versionsuffix']
332-
if is_cuda_software and LOCAL_ARCH_FULL not in GPU_ARCHS:
355+
if is_gpu_software(ec) and LOCAL_ARCH_FULL not in GPU_ARCHS:
333356
# only install the module file in non-GPU nodes
334357
# module_only steps: [MODULE_STEP, PREPARE_STEP, READY_STEP, POSTITER_STEP, SANITYCHECK_STEP]
335358
ec['module_only'] = True
@@ -338,7 +361,7 @@ def parse_hook(ec, *args, **kwargs): # pylint: disable=unused-argument
338361
ec.log.info(f"[parse hook] Set parameter skipsteps: {ec['skipsteps']}")
339362

340363
# set cuda compute capabilities
341-
elif is_cuda_software:
364+
elif is_gpu_software(ec):
342365
# on GPU nodes set cuda compute capabilities
343366
ec['cuda_compute_capabilities'] = ARCHS[LOCAL_ARCH_FULL]['cuda_cc']
344367
ec.log.info(f"[parse hook] Set parameter cuda_compute_capabilities: {ec['cuda_compute_capabilities']}")
@@ -579,19 +602,9 @@ def pre_module_hook(self, *args, **kwargs): # pylint: disable=unused-argument
579602
# ------ DUMMY MODULES -------- #
580603
#################################
581604

582-
is_cuda_software = 'CUDA' in self.name or 'CUDA' in self.cfg['versionsuffix']
583-
if is_cuda_software and LOCAL_ARCH_FULL not in GPU_ARCHS:
605+
if is_gpu_software(self) and LOCAL_ARCH_FULL not in GPU_ARCHS:
584606
self.log.info("[pre-module hook] Creating dummy module for CUDA modules on non-GPU nodes")
585-
self.cfg['modluafooter'] = """
586-
if mode() == "load" and not os.getenv("BUILD_TOOLS_LOAD_DUMMY_MODULES") then
587-
LmodError([[
588-
This module is only available on nodes with a GPU.
589-
Jobs can request GPUs with the command 'srun --gpus-per-node=1' or 'sbatch --gpus-per-node=1'.
590-
591-
More information in the VUB-HPC docs:
592-
https://hpc.vub.be/docs/job-submission/gpu-job-types/#gpu-jobs
593-
]])
594-
end"""
607+
self.cfg['modluafooter'] = GPU_DUMMY_MOD_FOOTER
595608

596609

597610
def post_build_and_install_loop_hook(ecs_with_res):

0 commit comments

Comments
 (0)