From f90f87eb8f21421628d7ba1ac96b84cb01d9719c Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 6 Mar 2024 11:40:01 +0100 Subject: [PATCH 01/18] add code for generating site-packages --- .../available_software/available_software.py | 74 ++++++++++++++++--- 1 file changed, 64 insertions(+), 10 deletions(-) diff --git a/scripts/available_software/available_software.py b/scripts/available_software/available_software.py index f04744dcd5fa..20bdb653535d 100644 --- a/scripts/available_software/available_software.py +++ b/scripts/available_software/available_software.py @@ -34,6 +34,8 @@ import re import subprocess import time +import math +from glob import glob from pathlib import Path from typing import Union, Tuple import numpy as np @@ -59,9 +61,9 @@ def main(): ) path_data_dir = os.path.join(root_dir, "mkdocs/docs/HPC/only/gent/available_software/data") - # Generate the JSON overviews and detail markdown pages. + # Generate the JSON overviews if args.eessi: - modules = modules_eesi() + modules = modules_eessi() else: modules = modules_ugent() @@ -69,9 +71,18 @@ def main(): print("Generate JSON overview... ", end="", flush=True) generate_json_overview(modules, path_data_dir) print("Done!") + + # Generate the JSON detail + json_data = generate_json_detailed_data(modules) + if args.eessi: + json_data = json_data + else: + json_data = get_site_packages_ugent(json_data) print("Generate JSON detailed... ", end="", flush=True) - json_path = generate_json_detailed(modules, path_data_dir) + json_path = generate_json_detailed(json_data, path_data_dir) print("Done!") + + # Generate detail markdown pages print("Generate detailed pages... ", end="", flush=True) generate_detail_pages(json_path, os.path.join(root_dir, "mkdocs/docs/HPC/only/gent/available_software/detail")) print("Done!") @@ -199,7 +210,7 @@ def clusters_eessi() -> np.ndarray: return clusters -def modules_eesi() -> dict: +def modules_eessi() -> dict: """ Returns names of all software module that are installed on EESSI. They are grouped by cluster. @@ -259,6 +270,36 @@ def clusters_ugent() -> np.ndarray: return module_avail(name="cluster/", filter_fn=filter_fn_gent_cluster) +def get_site_packages_ugent(json_data) -> dict: + """ + Add a list of site-packages to all python packages + @return: Dictionary with all the modules and their site_packages + """ + clusters = json_data['clusters'] + modules = json_data['software'] + path_mapping = { + "doduo": "/apps/gent/RHEL8/zen2-ib/", + "accelgor": "/apps/gent/RHEL8/zen3-ampere-ib/", + "donphan": "/apps/gent/RHEL8/cascadelake-ampere-ib/", + "gallade": "/apps/gent/RHEL8/zen3x-ib/", + "joltik": "/apps/gent/RHEL8/cascadelake-volta-ib/", + "skitty": "/apps/gent/RHEL8/skylake-ib/", + } + + for software, details in modules.items(): + for mod in modules[software]['versions']: + cluster = modules[software]['versions'][mod]['clusters'][0] + base_path = path_mapping[cluster] + "software/" + mod + path = base_path + "/lib/python*/site-packages/*" + site_packages = glob(path) + if site_packages != []: + site_packages = [os.path.basename(x) for x in site_packages] + site_packages = [s for s in site_packages if not "." in s] + json_data["software"][software]["versions"][mod]["site_packages"] = site_packages + + return json_data + + def modules_ugent() -> dict: """ Returns names of all software module that are installed on the HPC on UGent. @@ -343,7 +384,7 @@ def generate_software_table_data(software_data: dict, clusters: list) -> list: row = [module_name] for cluster in clusters: - row += ("x" if cluster in available else "-") + row += ("x" if cluster in available["clusters"] else "-") table_data += row return table_data @@ -386,6 +427,20 @@ def generate_software_detail_page( text=generate_software_table_data(sorted_versions, clusters) ) + for version, details in list(sorted_versions.items())[::-1]: + print(details) + if 'site_packages' in details: + print(version) + md_file.new_paragraph(f"### {version}") + md_file.new_paragraph("This is a list of site-packages included in the module:") + packages = "" + for i, package in enumerate(details['site_packages']): + if i != len(details['site_packages']) -1: + packages += f"{package}, " + else: + packages += f"{package}" + md_file.new_paragraph(f"{packages}") + md_file.create_md_file() # Remove the TOC @@ -570,20 +625,20 @@ def generate_json_detailed_data(modules: dict) -> dict: # If the version is not yet present, add it. if mod not in json_data["software"][software]["versions"]: - json_data["software"][software]["versions"][mod] = [] + json_data["software"][software]["versions"][mod] = {'clusters': []} # If the cluster is not yet present, add it. if cluster not in json_data["software"][software]["clusters"]: json_data["software"][software]["clusters"].append(cluster) # If the cluster is not yet present, add it. - if cluster not in json_data["software"][software]["versions"][mod]: - json_data["software"][software]["versions"][mod].append(cluster) + if cluster not in json_data["software"][software]["versions"][mod]["clusters"]: + json_data["software"][software]["versions"][mod]["clusters"].append(cluster) return json_data -def generate_json_detailed(modules: dict, path_data_dir: str) -> str: +def generate_json_detailed(json_data: dict, path_data_dir: str) -> str: """ Generate the detailed JSON. @@ -591,7 +646,6 @@ def generate_json_detailed(modules: dict, path_data_dir: str) -> str: @param path_data_dir: Path to the directory where the JSON will be placed. @return: Absolute path to the json file. """ - json_data = generate_json_detailed_data(modules) filepath = os.path.join(path_data_dir, "json_data_detail.json") with open(filepath, 'w') as outfile: json.dump(json_data, outfile) From 7b418de138b7490d23d91ea0227bdc007a0ccd10 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 6 Mar 2024 11:50:06 +0100 Subject: [PATCH 02/18] remove unecessary print statements --- scripts/available_software/available_software.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/available_software/available_software.py b/scripts/available_software/available_software.py index 20bdb653535d..992b1536c9bb 100644 --- a/scripts/available_software/available_software.py +++ b/scripts/available_software/available_software.py @@ -428,9 +428,7 @@ def generate_software_detail_page( ) for version, details in list(sorted_versions.items())[::-1]: - print(details) if 'site_packages' in details: - print(version) md_file.new_paragraph(f"### {version}") md_file.new_paragraph("This is a list of site-packages included in the module:") packages = "" From c7e9f7b8fdadfcea7dac52c59fd6f52476f227c1 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 6 Mar 2024 14:05:41 +0100 Subject: [PATCH 03/18] get software paths --- scripts/available_software/available_software.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/available_software/available_software.py b/scripts/available_software/available_software.py index 992b1536c9bb..0eaf58c7c6bc 100644 --- a/scripts/available_software/available_software.py +++ b/scripts/available_software/available_software.py @@ -65,8 +65,9 @@ def main(): if args.eessi: modules = modules_eessi() else: - modules = modules_ugent() + modules, paths = modules_ugent() + print(paths) print(modules) print("Generate JSON overview... ", end="", flush=True) generate_json_overview(modules, path_data_dir) @@ -261,6 +262,15 @@ def filter_fn_gent_modules(data: np.ndarray) -> np.ndarray: ] +def filter_fn_gent_software_path(data: np.ndarray) -> np.ndarray: + """ + Filter function for the software path of the cluster + @param data: Output + @return: Filtered output + """ + return data[np.char.endswith(data, "/modules/all:")] + + def clusters_ugent() -> np.ndarray: """ Returns all the cluster names of the HPC at UGent. @@ -308,15 +318,17 @@ def modules_ugent() -> dict: """ print("Start collecting modules:") data = {} + mapping = {} for cluster in clusters_ugent(): print(f"\t Collecting available modules for {cluster}... ", end="", flush=True) module_swap(cluster) cluster_name = cluster.split("/", maxsplit=1)[1] + mapping[cluster_name] = module_avail(filter_fn=filter_fn_gent_software_path) data[cluster_name] = module_avail(filter_fn=filter_fn_gent_modules) print(f"found {len(data[cluster_name])} modules!") print("All data collected!\n") - return data + return data, mapping # -------------------------------------------------------------------------------------------------------- From ee2226c8e64a68bc539faa9411334d926a63f76b Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 6 Mar 2024 14:30:33 +0100 Subject: [PATCH 04/18] remove hardcoded mapping --- scripts/available_software/available_software.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/scripts/available_software/available_software.py b/scripts/available_software/available_software.py index 0eaf58c7c6bc..2be0dca5c673 100644 --- a/scripts/available_software/available_software.py +++ b/scripts/available_software/available_software.py @@ -78,7 +78,7 @@ def main(): if args.eessi: json_data = json_data else: - json_data = get_site_packages_ugent(json_data) + json_data = get_site_packages_ugent(json_data, paths) print("Generate JSON detailed... ", end="", flush=True) json_path = generate_json_detailed(json_data, path_data_dir) print("Done!") @@ -280,26 +280,17 @@ def clusters_ugent() -> np.ndarray: return module_avail(name="cluster/", filter_fn=filter_fn_gent_cluster) -def get_site_packages_ugent(json_data) -> dict: +def get_site_packages_ugent(json_data, paths) -> dict: """ Add a list of site-packages to all python packages @return: Dictionary with all the modules and their site_packages """ - clusters = json_data['clusters'] modules = json_data['software'] - path_mapping = { - "doduo": "/apps/gent/RHEL8/zen2-ib/", - "accelgor": "/apps/gent/RHEL8/zen3-ampere-ib/", - "donphan": "/apps/gent/RHEL8/cascadelake-ampere-ib/", - "gallade": "/apps/gent/RHEL8/zen3x-ib/", - "joltik": "/apps/gent/RHEL8/cascadelake-volta-ib/", - "skitty": "/apps/gent/RHEL8/skylake-ib/", - } for software, details in modules.items(): for mod in modules[software]['versions']: cluster = modules[software]['versions'][mod]['clusters'][0] - base_path = path_mapping[cluster] + "software/" + mod + base_path = paths[cluster][0][-12] + "software/" + mod path = base_path + "/lib/python*/site-packages/*" site_packages = glob(path) if site_packages != []: From cf643c7cf5e7f3a2b727ae08e643cc432d81a68f Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 6 Mar 2024 14:44:12 +0100 Subject: [PATCH 05/18] remove hardcoded mapping --- scripts/available_software/available_software.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/available_software/available_software.py b/scripts/available_software/available_software.py index 2be0dca5c673..73f5266104b5 100644 --- a/scripts/available_software/available_software.py +++ b/scripts/available_software/available_software.py @@ -290,7 +290,7 @@ def get_site_packages_ugent(json_data, paths) -> dict: for software, details in modules.items(): for mod in modules[software]['versions']: cluster = modules[software]['versions'][mod]['clusters'][0] - base_path = paths[cluster][0][-12] + "software/" + mod + base_path = paths[cluster][0][:-12] + "software/" + mod path = base_path + "/lib/python*/site-packages/*" site_packages = glob(path) if site_packages != []: From 14e182c0930e2bf42103cce17d5e2d7c843247eb Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 6 Mar 2024 14:55:46 +0100 Subject: [PATCH 06/18] resolve CI errors --- scripts/available_software/available_software.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/available_software/available_software.py b/scripts/available_software/available_software.py index 73f5266104b5..e73c142d9fc5 100644 --- a/scripts/available_software/available_software.py +++ b/scripts/available_software/available_software.py @@ -34,7 +34,6 @@ import re import subprocess import time -import math from glob import glob from pathlib import Path from typing import Union, Tuple @@ -295,7 +294,7 @@ def get_site_packages_ugent(json_data, paths) -> dict: site_packages = glob(path) if site_packages != []: site_packages = [os.path.basename(x) for x in site_packages] - site_packages = [s for s in site_packages if not "." in s] + site_packages = [s for s in site_packages if "." not in s] json_data["software"][software]["versions"][mod]["site_packages"] = site_packages return json_data @@ -436,7 +435,7 @@ def generate_software_detail_page( md_file.new_paragraph("This is a list of site-packages included in the module:") packages = "" for i, package in enumerate(details['site_packages']): - if i != len(details['site_packages']) -1: + if i != len(details['site_packages']) - 1: packages += f"{package}, " else: packages += f"{package}" From 6b7a73c061bf0b30357f0762669461c194f1673b Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 6 Mar 2024 15:01:14 +0100 Subject: [PATCH 07/18] filter packages --- scripts/available_software/available_software.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/available_software/available_software.py b/scripts/available_software/available_software.py index e73c142d9fc5..b891f055ffa8 100644 --- a/scripts/available_software/available_software.py +++ b/scripts/available_software/available_software.py @@ -295,6 +295,7 @@ def get_site_packages_ugent(json_data, paths) -> dict: if site_packages != []: site_packages = [os.path.basename(x) for x in site_packages] site_packages = [s for s in site_packages if "." not in s] + site_packages = [s for s in site_packages if "__" not in s] json_data["software"][software]["versions"][mod]["site_packages"] = site_packages return json_data From 05d3ac4c9c44baf29d4ae9b260565342c130c8cf Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 6 Mar 2024 15:22:32 +0100 Subject: [PATCH 08/18] update tests for updated modules_ugent() funtion --- scripts/available_software/tests/test_data.py | 2 +- scripts/available_software/tests/test_json.py | 6 +++--- scripts/available_software/tests/test_md.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/available_software/tests/test_data.py b/scripts/available_software/tests/test_data.py index 57a1b3b5073f..ff358c010fad 100644 --- a/scripts/available_software/tests/test_data.py +++ b/scripts/available_software/tests/test_data.py @@ -20,7 +20,7 @@ def setup_class(cls): # --------------------------- def test_data_ugent(self): - sol = modules_ugent() + sol = modules_ugent()[0] assert len(sol) == 2 assert len(sol["dialga"]) == 13 assert len(sol["pikachu"]) == 15 diff --git a/scripts/available_software/tests/test_json.py b/scripts/available_software/tests/test_json.py index dd84b92856ce..4654dfc2624c 100644 --- a/scripts/available_software/tests/test_json.py +++ b/scripts/available_software/tests/test_json.py @@ -32,7 +32,7 @@ def teardown_class(cls): # --------------------------- def test_json_generate_simple(self): - modules = modules_ugent() + modules = modules_ugent()[0] json_data = generate_json_overview_data(modules) assert len(json_data.keys()) == 3 assert list(json_data["clusters"]) == ["dialga", "pikachu"] @@ -44,7 +44,7 @@ def test_json_generate_simple(self): } def test_json_simple(self): - modules = modules_ugent() + modules = modules_ugent()[0] json_path = generate_json_overview(modules, ".") with open(json_path) as json_data: data_generated = json.load(json_data) @@ -57,7 +57,7 @@ def test_json_simple(self): assert data_generated["clusters"] == data_solution["clusters"] def test_json_detail_simple(self): - modules = modules_ugent() + modules = modules_ugent()[0] json_path = generate_json_detailed(modules, ".") assert os.path.exists("json_data_detail.json") diff --git a/scripts/available_software/tests/test_md.py b/scripts/available_software/tests/test_md.py index d4b757a3fddc..7d45f68ddb67 100644 --- a/scripts/available_software/tests/test_md.py +++ b/scripts/available_software/tests/test_md.py @@ -28,7 +28,7 @@ def teardown_class(cls): # --------------------------- def test_table_generate_simple(self): - simple_data = get_unique_software_names(modules_ugent()) + simple_data = get_unique_software_names(modules_ugent()[0]) table_data, col, row = generate_table_data(simple_data) assert col == 3 assert row == 5 @@ -36,7 +36,7 @@ def test_table_generate_simple(self): def test_md_simple(self): md_file = MdUtils(file_name='test_simple', title='Overview Modules') - simple_data = get_unique_software_names(modules_ugent()) + simple_data = get_unique_software_names(modules_ugent()[0]) generate_module_table(simple_data, md_file) md_file.create_md_file() assert os.path.exists("test_simple.md") From fcc261346df775029bd12b2d82df56e1787b7afb Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 6 Mar 2024 15:31:46 +0100 Subject: [PATCH 09/18] update tests for updated generate_json_detailed() funtion --- scripts/available_software/tests/test_json.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/available_software/tests/test_json.py b/scripts/available_software/tests/test_json.py index 4654dfc2624c..10da798cee83 100644 --- a/scripts/available_software/tests/test_json.py +++ b/scripts/available_software/tests/test_json.py @@ -57,8 +57,8 @@ def test_json_simple(self): assert data_generated["clusters"] == data_solution["clusters"] def test_json_detail_simple(self): - modules = modules_ugent()[0] - json_path = generate_json_detailed(modules, ".") + with open(self.path + "/data/test_json_simple_sol.json") as json_data: + json_path = generate_json_detailed(json_data, ".") assert os.path.exists("json_data_detail.json") with open(json_path) as json_data: From 8f349b09e6e8ae1001f060d173776fc4853befc1 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 6 Mar 2024 16:43:54 +0100 Subject: [PATCH 10/18] update test for new json_data_detail.json structure --- scripts/available_software/test.sh | 2 +- .../tests/data/test_json_simple_sol_detail.json | 2 +- scripts/available_software/tests/test_json.py | 8 +++++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/scripts/available_software/test.sh b/scripts/available_software/test.sh index 85fd00b1a50f..33e8376a8153 100755 --- a/scripts/available_software/test.sh +++ b/scripts/available_software/test.sh @@ -1,3 +1,3 @@ #!/bin/bash -PYTHONPATH=$PWD:$PYTHONPATH pytest -v -s \ No newline at end of file +PYTHONPATH=$PWD:$PYTHONPATH pytest -v -s diff --git a/scripts/available_software/tests/data/test_json_simple_sol_detail.json b/scripts/available_software/tests/data/test_json_simple_sol_detail.json index 64bc33891323..8ff9aa66455b 100644 --- a/scripts/available_software/tests/data/test_json_simple_sol_detail.json +++ b/scripts/available_software/tests/data/test_json_simple_sol_detail.json @@ -1 +1 @@ -{"clusters": ["dialga", "pikachu"], "software": {"cfd": {"clusters": ["dialga", "pikachu"], "versions": {"cfd/1.0": ["dialga", "pikachu"], "cfd/2.0": ["dialga", "pikachu"], "cfd/24": ["dialga", "pikachu"], "cfd/5.0": ["dialga", "pikachu"], "cfd/2.0afqsdf": ["dialga", "pikachu"], "cfd/3.0": ["pikachu"]}}, "Markov": {"clusters": ["dialga"], "versions": {"Markov/hidden-1.0.5": ["dialga"], "Markov/hidden-1.0.10": ["dialga"]}}, "science": {"clusters": ["dialga", "pikachu"], "versions": {"science/5.3.0": ["dialga", "pikachu"], "science/7.2.0": ["dialga", "pikachu"]}}, "llm": {"clusters": ["pikachu"], "versions": {"llm/20230627": ["pikachu"]}}}, "time_generated": "Thu, 31 Aug 2023 at 14:00:22 CEST"} \ No newline at end of file +{"clusters": ["dialga", "pikachu"], "software": {"cfd": {"clusters": ["dialga", "pikachu"], "versions": {"cfd/1.0": {"clusters": ["dialga", "pikachu"]}, "cfd/2.0": {"clusters": ["dialga", "pikachu"]}, "cfd/24": {"clusters": ["dialga", "pikachu"]}, "cfd/5.0": {"clusters": ["dialga", "pikachu"]}, "cfd/2.0afqsdf": {"clusters": ["dialga", "pikachu"]}, "cfd/3.0": {"clusters": ["pikachu"]}}}, "Markov": {"clusters": ["dialga"], "versions": {"Markov/hidden-1.0.5": {"clusters": ["dialga"]}, "Markov/hidden-1.0.10": {"clusters": ["dialga"]}}}, "science": {"clusters": ["dialga", "pikachu"], "versions": {"science/5.3.0": {"clusters": ["dialga", "pikachu"]}, "science/7.2.0": {"clusters": ["dialga", "pikachu"]}}}, "llm": {"clusters": ["pikachu"], "versions": {"llm/20230627": {"clusters": ["pikachu"]}}}}, "time_generated": "Thu, 31 Aug 2023 at 14:00:22 CEST"} diff --git a/scripts/available_software/tests/test_json.py b/scripts/available_software/tests/test_json.py index 10da798cee83..e732585cf5a8 100644 --- a/scripts/available_software/tests/test_json.py +++ b/scripts/available_software/tests/test_json.py @@ -1,7 +1,8 @@ from available_software import (generate_json_overview_data, generate_json_overview, modules_ugent, - generate_json_detailed) + generate_json_detailed, + generate_json_detailed_data) import os import json @@ -57,8 +58,9 @@ def test_json_simple(self): assert data_generated["clusters"] == data_solution["clusters"] def test_json_detail_simple(self): - with open(self.path + "/data/test_json_simple_sol.json") as json_data: - json_path = generate_json_detailed(json_data, ".") + modules = modules_ugent()[0] + json_data = generate_json_detailed_data(modules) + json_path = generate_json_detailed(json_data, ".") assert os.path.exists("json_data_detail.json") with open(json_path) as json_data: From 9ab00847568f0c678d11dde237b06af202b6a411 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 6 Mar 2024 16:45:29 +0100 Subject: [PATCH 11/18] fix test.sh --- scripts/available_software/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/available_software/test.sh b/scripts/available_software/test.sh index 33e8376a8153..5f0c47682344 100755 --- a/scripts/available_software/test.sh +++ b/scripts/available_software/test.sh @@ -1,3 +1,3 @@ #!/bin/bash -PYTHONPATH=$PWD:$PYTHONPATH pytest -v -s +PYTHONPATH=$PWD:$PYTHONPATH pytest -V -s From 8cccba7f05d010096943aae09fa307d27e2a0b56 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 6 Mar 2024 16:46:15 +0100 Subject: [PATCH 12/18] fix test.sh --- scripts/available_software/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/available_software/test.sh b/scripts/available_software/test.sh index 5f0c47682344..33e8376a8153 100755 --- a/scripts/available_software/test.sh +++ b/scripts/available_software/test.sh @@ -1,3 +1,3 @@ #!/bin/bash -PYTHONPATH=$PWD:$PYTHONPATH pytest -V -s +PYTHONPATH=$PWD:$PYTHONPATH pytest -v -s From 3d1feb01f8894b925475b64e24ec0dab134d13d6 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Wed, 6 Mar 2024 16:52:43 +0100 Subject: [PATCH 13/18] fix trailing whitespace --- scripts/available_software/tests/test_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/available_software/tests/test_json.py b/scripts/available_software/tests/test_json.py index e732585cf5a8..80df5c2cc401 100644 --- a/scripts/available_software/tests/test_json.py +++ b/scripts/available_software/tests/test_json.py @@ -1,7 +1,7 @@ from available_software import (generate_json_overview_data, generate_json_overview, modules_ugent, - generate_json_detailed, + generate_json_detailed, generate_json_detailed_data) import os import json From 8062df355caa624e931c531d703c90f842f8ebd2 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Thu, 7 Mar 2024 17:55:40 +0100 Subject: [PATCH 14/18] update available_software to get description, homepage and extensions --- .../available_software/available_software.py | 76 +++++++++++++++---- 1 file changed, 62 insertions(+), 14 deletions(-) diff --git a/scripts/available_software/available_software.py b/scripts/available_software/available_software.py index b891f055ffa8..469b40cfa55b 100644 --- a/scripts/available_software/available_software.py +++ b/scripts/available_software/available_software.py @@ -77,7 +77,7 @@ def main(): if args.eessi: json_data = json_data else: - json_data = get_site_packages_ugent(json_data, paths) + json_data = get_extra_info_ugent(json_data, paths) print("Generate JSON detailed... ", end="", flush=True) json_path = generate_json_detailed(json_data, path_data_dir) print("Done!") @@ -179,6 +179,35 @@ def module_whatis(name: str) -> dict: return whatis +def module_info(info: str) -> dict: + """ + Function to parse through lua file. + + @param info: String with the contents of the lua file. + """ + whatis = {} + data = np.array(info.split("\n")) + # index of start description to handle multi lined description + i = np.flatnonzero(np.char.startswith(data, "whatis([==[Description"))[0] + if np.char.endswith(data[i], "]==])"): + content = re.sub(pattern=r'whatis\(\[==\[(.*)\]==\]\)', repl='\\1', string=data[i]).strip('"') + else: + description = re.sub(pattern=r'whatis\(\[==\[(.*)', repl='\\1', string=data[i]).strip('"') + while not np.char.endswith(data[i], "]==])"): + i += 1 + description += data[i] + content = re.sub(pattern=r'(.*)\]==\]\)', repl='\\1', string=description).strip('"') + key, value = tuple(content.split(":", maxsplit=1)) + whatis[key.strip()] = value.strip() + + for line in data[np.char.startswith(data, "whatis")]: + if not np.char.startswith(line, "whatis([==[Description"): + content = re.sub(pattern=r'whatis\(\[==\[(.*)\]==\]\)', repl='\\1', string=line).strip('"') + key, value = tuple(content.split(":", maxsplit=1)) + whatis[key.strip()] = value.strip() + return whatis + + # -------------------------------------------------------------------------------------------------------- # Fetch data EESSI # -------------------------------------------------------------------------------------------------------- @@ -279,25 +308,44 @@ def clusters_ugent() -> np.ndarray: return module_avail(name="cluster/", filter_fn=filter_fn_gent_cluster) -def get_site_packages_ugent(json_data, paths) -> dict: +def get_extra_info_ugent(json_data, paths) -> dict: """ - Add a list of site-packages to all python packages + add a list of extentions to all modules with extensions @return: Dictionary with all the modules and their site_packages """ modules = json_data['software'] - - for software, details in modules.items(): + for software in modules: for mod in modules[software]['versions']: + print(mod) cluster = modules[software]['versions'][mod]['clusters'][0] - base_path = paths[cluster][0][:-12] + "software/" + mod - path = base_path + "/lib/python*/site-packages/*" - site_packages = glob(path) - if site_packages != []: - site_packages = [os.path.basename(x) for x in site_packages] - site_packages = [s for s in site_packages if "." not in s] - site_packages = [s for s in site_packages if "__" not in s] - json_data["software"][software]["versions"][mod]["site_packages"] = site_packages - + print(cluster) + if software == "Java": + # Java has a strange naming sceme which causes probplems + continue + if mod in ["imkl/2020.4.304-NVHPC-21.2"]: + base_path = "/apps/gent/RHEL8/cascadelake-volta-ib/modules/all/" + elif mod in ['OpenFold/1.0.1-foss-2022a-CUDA-11.7.0', 'OpenMM/7.7.0-foss-2022a-CUDA-11.7.0', 'PyTorch-Lightning/1.7.7-foss-2022a-CUDA-11.7.0', 'PyTorch/1.12.1-foss-2022a-CUDA-11.7.0', 'Triton/1.1.1-foss-2022a-CUDA-11.7.0']: + base_path = "/apps/gent/RHEL8/cascadelake-ampere-ib/modules/all/" + elif cluster == "donphan": + base_path = "/apps/gent/RHEL8/cascadelake-ib/modules/all/" + elif cluster == "joltik": + base_path = "/apps/gent/RHEL8/cascadelake-volta-ib/modules/all/" + else: + base_path = paths[cluster][0][:-1] + "/" + path = base_path + mod + ".lua" + print(path) + file = open(path, "r") + info = file.read() + if info != "": + whatis = module_info(info) + print(whatis) + #module_swap("cluster/" + cluster) + #whatis = module_whatis(mod) + json_data['software'][software]['description'] = whatis['Description'] + if "Homepage" in whatis.keys(): + json_data['software'][software]['homepage'] = whatis['Homepage'] + if "Extensions" in whatis.keys(): + json_data["software"][software]["versions"][mod]["extensions"] = whatis['Extensions'] return json_data From 7fa036204a51f55ee5ee7c8eae3be38c3a2dfb21 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Thu, 7 Mar 2024 18:00:34 +0100 Subject: [PATCH 15/18] update available_software generated markdown pages --- .../available_software/available_software.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/scripts/available_software/available_software.py b/scripts/available_software/available_software.py index 469b40cfa55b..a98aef693dc0 100644 --- a/scripts/available_software/available_software.py +++ b/scripts/available_software/available_software.py @@ -462,6 +462,12 @@ def generate_software_detail_page( filename = f"{path}/{software_name}.md" md_file = MdUtils(file_name=filename, title=f"{software_name}") + description = software_data['description'] + md_file.new_paragraph(f"{description}") + if 'homepage' in software_data.keys(): + homepage = software_data['homepage'] + md_file.new_paragraph(f"{homepage}") + md_file.new_header(level=1, title="Available modules") md_file.new_paragraph(f"The overview below shows which {software_name} installations are available per HPC-UGent " @@ -479,15 +485,10 @@ def generate_software_detail_page( ) for version, details in list(sorted_versions.items())[::-1]: - if 'site_packages' in details: + if 'extensions' in details: md_file.new_paragraph(f"### {version}") - md_file.new_paragraph("This is a list of site-packages included in the module:") - packages = "" - for i, package in enumerate(details['site_packages']): - if i != len(details['site_packages']) - 1: - packages += f"{package}, " - else: - packages += f"{package}" + md_file.new_paragraph("This is a list of extensions included in the module:") + packages = details['extensions'] md_file.new_paragraph(f"{packages}") md_file.create_md_file() From 6683a5128f6006594dbf087b62e7a0f994163105 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Thu, 7 Mar 2024 18:09:59 +0100 Subject: [PATCH 16/18] resolve failing CI --- scripts/available_software/available_software.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/scripts/available_software/available_software.py b/scripts/available_software/available_software.py index a98aef693dc0..3fbc5bdd5cef 100644 --- a/scripts/available_software/available_software.py +++ b/scripts/available_software/available_software.py @@ -34,7 +34,6 @@ import re import subprocess import time -from glob import glob from pathlib import Path from typing import Union, Tuple import numpy as np @@ -316,15 +315,17 @@ def get_extra_info_ugent(json_data, paths) -> dict: modules = json_data['software'] for software in modules: for mod in modules[software]['versions']: - print(mod) cluster = modules[software]['versions'][mod]['clusters'][0] - print(cluster) if software == "Java": # Java has a strange naming sceme which causes probplems continue if mod in ["imkl/2020.4.304-NVHPC-21.2"]: base_path = "/apps/gent/RHEL8/cascadelake-volta-ib/modules/all/" - elif mod in ['OpenFold/1.0.1-foss-2022a-CUDA-11.7.0', 'OpenMM/7.7.0-foss-2022a-CUDA-11.7.0', 'PyTorch-Lightning/1.7.7-foss-2022a-CUDA-11.7.0', 'PyTorch/1.12.1-foss-2022a-CUDA-11.7.0', 'Triton/1.1.1-foss-2022a-CUDA-11.7.0']: + elif mod in ['OpenFold/1.0.1-foss-2022a-CUDA-11.7.0', + 'OpenMM/7.7.0-foss-2022a-CUDA-11.7.0', + 'PyTorch-Lightning/1.7.7-foss-2022a-CUDA-11.7.0', + 'PyTorch/1.12.1-foss-2022a-CUDA-11.7.0', + 'Triton/1.1.1-foss-2022a-CUDA-11.7.0']: base_path = "/apps/gent/RHEL8/cascadelake-ampere-ib/modules/all/" elif cluster == "donphan": base_path = "/apps/gent/RHEL8/cascadelake-ib/modules/all/" @@ -333,14 +334,10 @@ def get_extra_info_ugent(json_data, paths) -> dict: else: base_path = paths[cluster][0][:-1] + "/" path = base_path + mod + ".lua" - print(path) file = open(path, "r") info = file.read() if info != "": whatis = module_info(info) - print(whatis) - #module_swap("cluster/" + cluster) - #whatis = module_whatis(mod) json_data['software'][software]['description'] = whatis['Description'] if "Homepage" in whatis.keys(): json_data['software'][software]['homepage'] = whatis['Homepage'] From 276ca21657fbc435658b188973bce9bd7305bfb4 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Thu, 7 Mar 2024 18:28:05 +0100 Subject: [PATCH 17/18] add resolve for missing description for some software --- scripts/available_software/available_software.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/available_software/available_software.py b/scripts/available_software/available_software.py index 3fbc5bdd5cef..209f57e2e7ec 100644 --- a/scripts/available_software/available_software.py +++ b/scripts/available_software/available_software.py @@ -459,8 +459,9 @@ def generate_software_detail_page( filename = f"{path}/{software_name}.md" md_file = MdUtils(file_name=filename, title=f"{software_name}") - description = software_data['description'] - md_file.new_paragraph(f"{description}") + if 'description' in software_data.keys(): + description = software_data['description'] + md_file.new_paragraph(f"{description}") if 'homepage' in software_data.keys(): homepage = software_data['homepage'] md_file.new_paragraph(f"{homepage}") From 37c275d634bee8588ceb5cb694acb4daea7cf0b3 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Thu, 7 Mar 2024 18:30:40 +0100 Subject: [PATCH 18/18] resolve failing CI --- scripts/available_software/available_software.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/available_software/available_software.py b/scripts/available_software/available_software.py index 209f57e2e7ec..9011f8aec6dc 100644 --- a/scripts/available_software/available_software.py +++ b/scripts/available_software/available_software.py @@ -321,10 +321,10 @@ def get_extra_info_ugent(json_data, paths) -> dict: continue if mod in ["imkl/2020.4.304-NVHPC-21.2"]: base_path = "/apps/gent/RHEL8/cascadelake-volta-ib/modules/all/" - elif mod in ['OpenFold/1.0.1-foss-2022a-CUDA-11.7.0', - 'OpenMM/7.7.0-foss-2022a-CUDA-11.7.0', - 'PyTorch-Lightning/1.7.7-foss-2022a-CUDA-11.7.0', - 'PyTorch/1.12.1-foss-2022a-CUDA-11.7.0', + elif mod in ['OpenFold/1.0.1-foss-2022a-CUDA-11.7.0', + 'OpenMM/7.7.0-foss-2022a-CUDA-11.7.0', + 'PyTorch-Lightning/1.7.7-foss-2022a-CUDA-11.7.0', + 'PyTorch/1.12.1-foss-2022a-CUDA-11.7.0', 'Triton/1.1.1-foss-2022a-CUDA-11.7.0']: base_path = "/apps/gent/RHEL8/cascadelake-ampere-ib/modules/all/" elif cluster == "donphan":