Kubernetes Operator for Tangled Spindles

fix

evan.jarrett.net b0f6bc98 e42b257a

verified
+13 -12
+13 -12
internal/engine/kubernetes_engine.go
··· 355 355 // Read from stream until this step's end event 356 356 if wfLogger != nil { 357 357 logger.Info("Reading logs for step", "stepID", idx) 358 - if err := e.readUntilStepEnd(stream, idx, w, wfLogger); err != nil { 358 + if err := e.readUntilStepEnd(ctx, stream, idx, w, wfLogger); err != nil { 359 359 logger.Error(err, "Failed to read step logs") 360 360 // Clean up stream on error 361 361 e.closeLogStream(wid) ··· 497 497 } 498 498 499 499 // readUntilStepEnd reads from the log stream until the end event for the specified step 500 - func (e *KubernetesEngine) readUntilStepEnd(stream *workflowLogStream, stepID int, workflow *models.Workflow, wfLogger *models.WorkflowLogger) error { 500 + func (e *KubernetesEngine) readUntilStepEnd(ctx context.Context, stream *workflowLogStream, stepID int, workflow *models.Workflow, wfLogger *models.WorkflowLogger) error { 501 501 scanner := stream.scanner 502 502 503 503 for scanner.Scan() { ··· 550 550 } 551 551 552 552 // Scanner ended without seeing step end event. 553 - // Check pod phase to determine if this is an error or expected behavior. 554 - if stream.podPhase == corev1.PodSucceeded { 555 - // Pod succeeded but we didn't find the control event - treat as success. 556 - // This can happen if logs were truncated or runner didn't emit events. 557 - return nil 553 + // Re-check current pod status - it may have completed since we started streaming. 554 + currentPod := &corev1.Pod{} 555 + if err := e.client.Get(ctx, client.ObjectKey{Namespace: stream.pod.Namespace, Name: stream.pod.Name}, currentPod); err == nil { 556 + if currentPod.Status.Phase == corev1.PodSucceeded { 557 + // Pod succeeded - treat as success even without control event 558 + return nil 559 + } 560 + if currentPod.Status.Phase == corev1.PodFailed { 561 + return fmt.Errorf("pod failed before step %d completed", stepID) 562 + } 558 563 } 559 564 560 - if stream.podPhase == corev1.PodFailed { 561 - return fmt.Errorf("pod failed before step %d completed", stepID) 562 - } 563 - 564 - // Pod was running when we started but stream ended unexpectedly 565 + // Pod status unknown or still running but stream ended unexpectedly 565 566 return fmt.Errorf("log stream ended before step %d completed", stepID) 566 567 } 567 568