Skip to content

Commit a7f95d7

Browse files
yansun1996sajmera-pensando
authored andcommitted
Optimize the docs and filename for blacklist function
1 parent 0d32e5d commit a7f95d7

File tree

10 files changed

+39
-13
lines changed

10 files changed

+39
-13
lines changed

api/v1alpha1/deviceconfig_types.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,9 @@ type DriverSpec struct {
9494
// +kubebuilder:default=true
9595
Enable *bool `json:"enable,omitempty"`
9696

97-
// blacklist amdgpu drivers on the host
97+
// blacklist amdgpu drivers on the host. Node reboot is required to apply the baclklist on the worker nodes.
98+
// Not working for OpenShift cluster. OpenShift users please use the Machine Config Operator (MCO) resource to configure amdgpu blacklist.
99+
// Example MCO resource is available at https://instinct.docs.amd.com/projects/gpu-operator/en/latest/installation/openshift-olm.html#create-blacklist-for-installing-out-of-tree-kernel-module
98100
//+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="BlacklistDrivers",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:blacklistDrivers"}
99101
Blacklist *bool `json:"blacklist,omitempty"`
100102

bundle/manifests/amd-gpu-operator.clusterserviceversion.yaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ metadata:
3030
}
3131
]
3232
capabilities: Basic Install
33-
createdAt: "2025-03-25T06:19:27Z"
33+
createdAt: "2025-03-26T20:10:59Z"
3434
operatorframework.io/suggested-namespace: openshift-amd-gpu
3535
operators.operatorframework.io/builder: operator-sdk-v1.32.0
3636
operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
@@ -229,7 +229,10 @@ spec:
229229
path: driver.amdgpuInstallerRepoURL
230230
x-descriptors:
231231
- urn:alm:descriptor:com.amd.deviceconfigs:amdgpuInstallerRepoURL
232-
- description: blacklist amdgpu drivers on the host
232+
- description: blacklist amdgpu drivers on the host. Node reboot is required
233+
to apply the baclklist on the worker nodes. Not working for OpenShift cluster.
234+
OpenShift users please use the Machine Config Operator (MCO) resource to
235+
configure amdgpu blacklist. Example MCO resource is available at https://instinct.docs.amd.com/projects/gpu-operator/en/latest/installation/openshift-olm.html#create-blacklist-for-installing-out-of-tree-kernel-module
233236
displayName: BlacklistDrivers
234237
path: driver.blacklist
235238
x-descriptors:

bundle/manifests/amd.com_deviceconfigs.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,10 @@ spec:
342342
installer URL is https://repo.radeon.com/amdgpu-install by default
343343
type: string
344344
blacklist:
345-
description: blacklist amdgpu drivers on the host
345+
description: |-
346+
blacklist amdgpu drivers on the host. Node reboot is required to apply the baclklist on the worker nodes.
347+
Not working for OpenShift cluster. OpenShift users please use the Machine Config Operator (MCO) resource to configure amdgpu blacklist.
348+
Example MCO resource is available at https://instinct.docs.amd.com/projects/gpu-operator/en/latest/installation/openshift-olm.html#create-blacklist-for-installing-out-of-tree-kernel-module
346349
type: boolean
347350
enable:
348351
default: true

config/crd/bases/amd.com_deviceconfigs.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,10 @@ spec:
338338
installer URL is https://repo.radeon.com/amdgpu-install by default
339339
type: string
340340
blacklist:
341-
description: blacklist amdgpu drivers on the host
341+
description: |-
342+
blacklist amdgpu drivers on the host. Node reboot is required to apply the baclklist on the worker nodes.
343+
Not working for OpenShift cluster. OpenShift users please use the Machine Config Operator (MCO) resource to configure amdgpu blacklist.
344+
Example MCO resource is available at https://instinct.docs.amd.com/projects/gpu-operator/en/latest/installation/openshift-olm.html#create-blacklist-for-installing-out-of-tree-kernel-module
342345
type: boolean
343346
enable:
344347
default: true

config/manifests/bases/amd-gpu-operator.clusterserviceversion.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,10 @@ spec:
200200
path: driver.amdgpuInstallerRepoURL
201201
x-descriptors:
202202
- urn:alm:descriptor:com.amd.deviceconfigs:amdgpuInstallerRepoURL
203-
- description: blacklist amdgpu drivers on the host
203+
- description: blacklist amdgpu drivers on the host. Node reboot is required
204+
to apply the baclklist on the worker nodes. Not working for OpenShift cluster.
205+
OpenShift users please use the Machine Config Operator (MCO) resource to
206+
configure amdgpu blacklist. Example MCO resource is available at https://instinct.docs.amd.com/projects/gpu-operator/en/latest/installation/openshift-olm.html#create-blacklist-for-installing-out-of-tree-kernel-module
204207
displayName: BlacklistDrivers
205208
path: driver.blacklist
206209
x-descriptors:

helm-charts-k8s/Chart.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ dependencies:
66
repository: file://./charts/kmm
77
version: v1.0.0
88
digest: sha256:f9a315dd2ce3d515ebf28c8e9a6a82158b493ca2686439ec381487761261b597
9-
generated: "2025-03-25T06:19:17.248998622Z"
9+
generated: "2025-03-26T20:10:45.247725094Z"

helm-charts-k8s/crds/deviceconfig-crd.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,10 @@ spec:
346346
installer URL is https://repo.radeon.com/amdgpu-install by default
347347
type: string
348348
blacklist:
349-
description: blacklist amdgpu drivers on the host
349+
description: |-
350+
blacklist amdgpu drivers on the host. Node reboot is required to apply the baclklist on the worker nodes.
351+
Not working for OpenShift cluster. OpenShift users please use the Machine Config Operator (MCO) resource to configure amdgpu blacklist.
352+
Example MCO resource is available at https://instinct.docs.amd.com/projects/gpu-operator/en/latest/installation/openshift-olm.html#create-blacklist-for-installing-out-of-tree-kernel-module
350353
type: boolean
351354
enable:
352355
default: true

helm-charts-openshift/Chart.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ dependencies:
66
repository: file://./charts/kmm
77
version: v1.0.0
88
digest: sha256:25200c34a5cc846a1275e5bf3fc637b19e909dc68de938189c5278d77d03f5ac
9-
generated: "2025-03-25T06:19:26.060856628Z"
9+
generated: "2025-03-26T20:10:56.781691243Z"

helm-charts-openshift/crds/deviceconfig-crd.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,10 @@ spec:
346346
installer URL is https://repo.radeon.com/amdgpu-install by default
347347
type: string
348348
blacklist:
349-
description: blacklist amdgpu drivers on the host
349+
description: |-
350+
blacklist amdgpu drivers on the host. Node reboot is required to apply the baclklist on the worker nodes.
351+
Not working for OpenShift cluster. OpenShift users please use the Machine Config Operator (MCO) resource to configure amdgpu blacklist.
352+
Example MCO resource is available at https://instinct.docs.amd.com/projects/gpu-operator/en/latest/installation/openshift-olm.html#create-blacklist-for-installing-out-of-tree-kernel-module
350353
type: boolean
351354
enable:
352355
default: true

internal/nodelabeller/nodelabeller.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ const (
5252
defaultNodeLabellerImage = "rocm/k8s-device-plugin:labeller-latest"
5353
defaultUbiNodeLabellerImage = "rocm/k8s-node-labeller:rhubi-latest"
5454
defaultInitContainerImage = "busybox:1.36"
55+
defaultBlacklistFileName = "blacklist-amdgpu.conf"
56+
openShiftBlacklistFileName = "blacklist-amdgpu-by-operator.conf"
5557
)
5658

5759
//go:generate mockgen -source=nodelabeller.go -package=nodelabeller -destination=mock_nodelabeller.go NodeLabeller
@@ -129,15 +131,19 @@ func (nl *nodeLabeller) SetNodeLabellerAsDesired(ds *appsv1.DaemonSet, devConfig
129131
},
130132
}
131133

132-
var initContainerCommand []string
134+
blackListFileName := defaultBlacklistFileName
135+
if nl.isOpenShift {
136+
blackListFileName = openShiftBlacklistFileName
137+
}
133138

139+
var initContainerCommand []string
134140
if devConfig.Spec.Driver.Blacklist != nil && *devConfig.Spec.Driver.Blacklist {
135141
// if users want to apply the blacklist, init container will add the amdgpu to the blacklist
136-
initContainerCommand = []string{"sh", "-c", "echo \"# added by gpu operator \nblacklist amdgpu\" > /host-etc/modprobe.d/blacklist-amdgpu.conf; while [ ! -d /host-sys/class/kfd ] || [ ! -d /host-sys/module/amdgpu/drivers/ ]; do echo \"amdgpu driver is not loaded \"; sleep 2 ;done"}
142+
initContainerCommand = []string{"sh", "-c", fmt.Sprintf("echo \"# added by gpu operator \nblacklist amdgpu\" > /host-etc/modprobe.d/%v; while [ ! -d /host-sys/class/kfd ] || [ ! -d /host-sys/module/amdgpu/drivers/ ]; do echo \"amdgpu driver is not loaded \"; sleep 2 ;done", blackListFileName)}
137143
} else {
138144
// if users disabled the KMM driver, or disabled the blacklist
139145
// init container will remove any hanging amdgpu blacklist entry from the list
140-
initContainerCommand = []string{"sh", "-c", "rm -f /host-etc/modprobe.d/blacklist-amdgpu.conf; while [ ! -d /host-sys/class/kfd ] || [ ! -d /host-sys/module/amdgpu/drivers/ ]; do echo \"amdgpu driver is not loaded \"; sleep 2 ;done"}
146+
initContainerCommand = []string{"sh", "-c", fmt.Sprintf("rm -f /host-etc/modprobe.d/%v; while [ ! -d /host-sys/class/kfd ] || [ ! -d /host-sys/module/amdgpu/drivers/ ]; do echo \"amdgpu driver is not loaded \"; sleep 2 ;done", blackListFileName)}
141147
}
142148

143149
initContainerImage := defaultInitContainerImage

0 commit comments

Comments
 (0)