fix: dedicated gpu annotation causing webhook failure issue (#356)

Code2Life · web-flow · commit e55e53d957cc · 2025-09-10T09:12:54.000+08:00
diff --git a/internal/gpuallocator/gpuallocator.go b/internal/gpuallocator/gpuallocator.go
@@ -850,6 +850,11 @@ func (s *GpuAllocator) handleGPUCreate(ctx context.Context, gpu *tfv1.GPU) {
 	defer s.storeMutex.Unlock()
 
 	if s.gpuStore[key] != nil {
+		if gpu.Status.GPUModel != "" {
+			if _, exists := GPUCapacityMap[gpu.Status.GPUModel]; !exists {
+				GPUCapacityMap[gpu.Status.GPUModel] = *gpu.Status.Capacity
+			}
+		}
 		syncGPUMetadataAndStatusFromCluster(s.gpuStore[key], gpu)
 		log.V(6).Info("GPU already exists in store", "name", key.Name)
 		return
diff --git a/internal/webhook/v1/tf_parser.go b/internal/webhook/v1/tf_parser.go
@@ -258,6 +258,7 @@ func handleDedicatedGPU(pod *corev1.Pod, workloadProfile *tfv1.WorkloadProfile)
 	workloadProfile.Spec.Resources.Requests.Vram = resource.Vram
 	workloadProfile.Spec.Resources.Limits.Tflops = resource.Tflops
 	workloadProfile.Spec.Resources.Limits.Vram = resource.Vram
+	workloadProfile.Spec.Qos = tfv1.QoSCritical
 
 	return nil
 }

Original file line number	Diff line number	Diff line change
`@@ -258,6 +258,7 @@ func handleDedicatedGPU(pod corev1.Pod, workloadProfile tfv1.WorkloadProfile)`
`258`	`258`	`workloadProfile.Spec.Resources.Requests.Vram = resource.Vram`
`259`	`259`	`workloadProfile.Spec.Resources.Limits.Tflops = resource.Tflops`
`260`	`260`	`workloadProfile.Spec.Resources.Limits.Vram = resource.Vram`
	`261`	`+ workloadProfile.Spec.Qos = tfv1.QoSCritical`
`261`	`262`
`262`	`263`	`return nil`
`263`	`264`	`}`