@@ -139,7 +139,7 @@ Below is an example of a full DeviceConfig CR that can be used to install the AM
139
139
upgradePolicy :
140
140
# (Optional) If no UpgradePolicy is mentioned for any of the components but their image is changed, the daemonset will
141
141
# get upgraded according to the defaults, which is `upgradeStrategy` set to `RollingUpdate` and `maxUnavailable` set to 1.
142
- upgradeStrategy : RollingUpdate, # (Optional) Can be either `RollingUpdate` or `OnDelete`
142
+ upgradeStrategy : " RollingUpdate" # (Optional) Can be either `RollingUpdate` or `OnDelete`
143
143
maxUnavailable : 1 # (Optional) Number of pods that can be unavailable during the upgrade process. 1 is the default value
144
144
# # AMD GPU Metrics Exporter Configuration ##
145
145
metricsExporter :
@@ -156,7 +156,7 @@ Below is an example of a full DeviceConfig CR that can be used to install the AM
156
156
upgradePolicy :
157
157
# (Optional) If no UpgradePolicy is mentioned for any of the components but their image is changed, the daemonset will
158
158
# get upgraded according to the defaults, which is `upgradeStrategy` set to `RollingUpdate` and `maxUnavailable` set to 1.
159
- upgradeStrategy : RollingUpdate, # (Optional) Can be either `RollingUpdate` or `OnDelete`
159
+ upgradeStrategy : " RollingUpdate" # (Optional) Can be either `RollingUpdate` or `OnDelete`
160
160
maxUnavailable : 1 # (Optional) Number of pods that can be unavailable during the upgrade process. 1 is the default value
161
161
# If specifying a node selector here, the metrics exporter will only be deployed on nodes that match the selector
162
162
# See Item #6 on https://instinct.docs.amd.com/projects/gpu-operator/en/latest/knownlimitations.html for example usage
@@ -224,6 +224,29 @@ Below is an example of a full DeviceConfig CR that can be used to install the AM
224
224
selector :
225
225
feature.node.kubernetes.io/amd-gpu : " true" # You must include this again as this selector will overwrite the global selector
226
226
amd.com/device-test-runner : " true" # Helpful for when you want to disable the test runner on specific nodes
227
+ configManager :
228
+ enable : False # False by Default. Set to True to enable the config manager
229
+ image : " rocm/device-config-manager:v1.3.1" # image for the device-config-manager container
230
+ imagePullPolicy : IfNotPresent # image pull policy for config manager. Accepted values are Always, IfNotPresent, Never
231
+ config : # specify configmap name which stores profile config info
232
+ name : " config-manager-config"
233
+ upgradePolicy :
234
+ # (Optional) If no UpgradePolicy is mentioned for any of the components but their image is changed, the daemonset will
235
+ # get upgraded according to the defaults, which is `upgradeStrategy` set to `RollingUpdate` and `maxUnavailable` set to 1.
236
+ upgradeStrategy : " RollingUpdate" # (Optional) Can be either `RollingUpdate` or `OnDelete`
237
+ maxUnavailable : 1 # (Optional) Number of pods that can be unavailable during the upgrade process. 1 is the default value
238
+ # DCM pod deployed either as a standalone pod or through the GPU operator will have
239
+ # a toleration attached to it. User can specify additional tolerations if required
240
+ # key: amd-dcm , value: up , Operator: Equal, effect: NoExecute
241
+ # OPTIONAL
242
+ # toleration field for dcm pod to bypass nodes with specific taints
243
+ configManagerTolerations :
244
+ - key : " key1"
245
+ operator : " Equal"
246
+ value : " value1"
247
+ effect : " NoExecute"
248
+ selector : # (Optional)
249
+ feature.node.kubernetes.io/amd-gpu : " true" # You can include this if you wish to overwrite the global selector
227
250
selector :
228
251
# Specify the nodes to be managed by this DeviceConfig Custom Resource. This will be applied to all components unless a selector
229
252
# is specified in the component configuration. The node labeller will automatically find nodes with AMD GPUs and apply the label
0 commit comments