Skip to content

Commit a09fc03

Browse files
authored
fix(backend): Guard against panic when MLMD execution publishing fails (#12203)
* Guard against panic when MLMD execution publishing fails This also removes the defer that obscures the original error being returned. Signed-off-by: mprahl <[email protected]> * Increase the CI timeout on deploying KFP With seaweedfs, the image can take a long time to pull. It's better to wait longer than to fail the workflow. Signed-off-by: mprahl <[email protected]> --------- Signed-off-by: mprahl <[email protected]>
1 parent f94714e commit a09fc03

File tree

2 files changed

+21
-8
lines changed

2 files changed

+21
-8
lines changed

.github/resources/scripts/kfp-readiness/wait_for_pods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def print_get_pods():
7777
print(f"An error occurred while running kubectl get pods: {e.stderr}")
7878

7979

80-
def check_pods(calm_time=10, timeout=600, retries_after_ready=5):
80+
def check_pods(calm_time=10, timeout=900, retries_after_ready=5):
8181
start_time = time.time()
8282
stable_count = 0
8383
previous_statuses = {}

backend/src/v2/component/launcher_v2.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,11 @@ func (l *LauncherV2) Execute(ctx context.Context) (err error) {
171171
var outputArtifacts []*metadata.OutputArtifact
172172
status := pb.Execution_FAILED
173173
defer func() {
174+
if execution == nil {
175+
glog.Errorf("Skipping publish since execution is nil. Original err is: %v", err)
176+
return
177+
}
178+
174179
if perr := l.publish(ctx, execution, executorOutput, outputArtifacts, status); perr != nil {
175180
if err != nil {
176181
err = fmt.Errorf("failed to publish execution with error %s after execution failed: %s", perr.Error(), err.Error())
@@ -312,17 +317,25 @@ func (l *LauncherV2) publish(
312317
outputArtifacts []*metadata.OutputArtifact,
313318
status pb.Execution_State,
314319
) (err error) {
315-
defer func() {
316-
if err != nil {
317-
err = fmt.Errorf("failed to publish results to ML Metadata: %w", err)
318-
}
319-
}()
320-
outputParameters := executorOutput.GetParameterValues()
320+
if execution == nil {
321+
return fmt.Errorf("failed to publish results to ML Metadata: execution is nil")
322+
}
323+
324+
var outputParameters map[string]*structpb.Value
325+
if executorOutput != nil {
326+
outputParameters = executorOutput.GetParameterValues()
327+
}
328+
321329
// TODO(Bobgy): upload output artifacts.
322330
// TODO(Bobgy): when adding artifacts, we will need execution.pipeline to be non-nil, because we need
323331
// to publish output artifacts to the context too.
324332
// return l.metadataClient.PublishExecution(ctx, execution, outputParameters, outputArtifacts, pb.Execution_COMPLETE)
325-
return l.clientManager.MetadataClient().PublishExecution(ctx, execution, outputParameters, outputArtifacts, status)
333+
err = l.clientManager.MetadataClient().PublishExecution(ctx, execution, outputParameters, outputArtifacts, status)
334+
if err != nil {
335+
return fmt.Errorf("failed to publish results to ML Metadata: %w", err)
336+
}
337+
338+
return nil
326339
}
327340

328341
// executeV2 handles placeholder substitution for inputs, calls execute to

0 commit comments

Comments
 (0)