···30303131// workflowLogStream holds the state for streaming logs from a workflow's pod
3232type workflowLogStream struct {
3333- scanner *bufio.Scanner
3434- stream io.ReadCloser
3535- pod *corev1.Pod
3333+ scanner *bufio.Scanner
3434+ stream io.ReadCloser
3535+ pod *corev1.Pod
3636+ podPhase corev1.PodPhase // Track pod phase at stream creation time
3637}
37383839// KubernetesEngine implements the spindle Engine interface for Kubernetes Jobs.
···278279 PropagationPolicy: &deletePolicy,
279280 }
280281281281- if err := e.client.Delete(ctx, spindleSet, deleteOptions); err != nil {
282282+ // Use a fresh context for cleanup to ensure deletion succeeds even if the
283283+ // original context was canceled (e.g., by errgroup when another workflow completes).
284284+ // This prevents orphaned SpindleSets when running multiple workflows in parallel.
285285+ cleanupCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
286286+ defer cancel()
287287+288288+ if err := e.client.Delete(cleanupCtx, spindleSet, deleteOptions); err != nil {
282289 // Ignore not found errors (SpindleSet may have already been deleted)
283290 if client.IgnoreNotFound(err) != nil {
284291 return fmt.Errorf("failed to delete SpindleSet: %w", err)
···436443 time.Sleep(1 * time.Second)
437444 }
438445439439- logger.Info("Pod is ready, streaming logs", "podName", pod.Name, "phase", pod.Status.Phase)
446446+ // Only use Follow mode for running pods. For completed pods, we need to read
447447+ // existing logs (Follow:true only streams NEW logs after connection).
448448+ shouldFollow := pod.Status.Phase == corev1.PodRunning
449449+ if !shouldFollow {
450450+ logger.Info("Pod already completed, reading existing logs", "podName", pod.Name, "phase", pod.Status.Phase)
451451+ } else {
452452+ logger.Info("Pod is running, streaming logs", "podName", pod.Name, "phase", pod.Status.Phase)
453453+ }
440454441455 // Stream logs from the main container (not init containers)
442456 req := clientset.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &corev1.PodLogOptions{
443457 Container: "runner",
444444- Follow: true,
458458+ Follow: shouldFollow,
445459 })
446460447461 logStream, err := req.Stream(ctx)
···456470457471 // Create and store stream
458472 stream = &workflowLogStream{
459459- scanner: scanner,
460460- stream: logStream,
461461- pod: pod,
473473+ scanner: scanner,
474474+ stream: logStream,
475475+ pod: pod,
476476+ podPhase: pod.Status.Phase,
462477 }
463478464479 e.streamMutex.Lock()
···534549 }
535550 }
536551537537- // If we get here, scanner ended without seeing step end event
538538- // This could mean pod terminated early
552552+ // Scanner ended without seeing step end event.
553553+ // Check pod phase to determine if this is an error or expected behavior.
554554+ if stream.podPhase == corev1.PodSucceeded {
555555+ // Pod succeeded but we didn't find the control event - treat as success.
556556+ // This can happen if logs were truncated or runner didn't emit events.
557557+ return nil
558558+ }
559559+560560+ if stream.podPhase == corev1.PodFailed {
561561+ return fmt.Errorf("pod failed before step %d completed", stepID)
562562+ }
563563+564564+ // Pod was running when we started but stream ended unexpectedly
539565 return fmt.Errorf("log stream ended before step %d completed", stepID)
540566}
541567