···3636 podPhase corev1.PodPhase // Track pod phase at stream creation time
3737}
38383939+// extendedLogLine extends models.LogLine with exit code for error reporting
4040+type extendedLogLine struct {
4141+ models.LogLine
4242+ ExitCode int `json:"exit_code,omitempty"`
4343+}
4444+3945// KubernetesEngine implements the spindle Engine interface for Kubernetes Jobs.
4046type KubernetesEngine struct {
4147 client client.Client
···547553 for scanner.Scan() {
548554 line := scanner.Text()
549555550550- // Try to parse as models.LogLine from the runner binary
551551- var logLine models.LogLine
556556+ // Try to parse as extendedLogLine from the runner binary (includes exit_code)
557557+ var logLine extendedLogLine
552558 if err := json.Unmarshal([]byte(line), &logLine); err != nil {
553559 // Not JSON or parse error - skip
554560 continue
···571577 // Use control events from runner for flow control only
572578 // Don't write them - the core spindle engine writes control events
573579 if logLine.StepStatus == models.StepStatusEnd {
574574- // Step is done, return
580580+ // Check exit code before returning success
581581+ if logLine.ExitCode != 0 {
582582+ return fmt.Errorf("step %d failed with exit code %d", stepID, logLine.ExitCode)
583583+ }
575584 return nil
576585 }
577586 // For "start" events, just continue reading
+102-94
internal/jobbuilder/job_template.go
···215215 AutomountServiceAccountToken: &[]bool{false}[0],
216216217217 // Init containers: setup user and buildah storage, install runner binary, then clone repository
218218- InitContainers: []corev1.Container{
219219- // Setup user creates /etc/passwd and /etc/group entries for UID 1000
220220- // This is needed because many tools (like buildah) require a valid passwd entry
221221- // Also configures buildah storage directories and storage.conf
222222- {
223223- Name: "setup-user",
224224- Image: "busybox:latest",
225225- Command: []string{"/bin/sh", "-c"},
226226- Args: []string{`
227227-cat > /etc-override/passwd <<'EOF'
228228-root:x:0:0:root:/root:/bin/bash
229229-nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin
230230-runner:x:1000:1000:runner:/home/runner:/bin/sh
231231-EOF
232232-cat > /etc-override/group <<'EOF'
233233-root:x:0:
234234-nobody:x:65534:
235235-runner:x:1000:
236236-EOF
237237-# subuid/subgid mappings for rootless buildah user namespaces
238238-cat > /etc-override/subuid <<'EOF'
239239-runner:100000:65536
240240-EOF
241241-cat > /etc-override/subgid <<'EOF'
242242-runner:100000:65536
243243-EOF
244244-# Create home directory structure then fix ownership
245245-mkdir -p /home-override/runner/.config/containers
246246-chmod 700 /home-override/runner/.config
247247-chown -R 1000:1000 /home-override/runner
248248-# Configure buildah storage directories (for workflows using buildah images)
249249-mkdir -p /var/lib/containers/storage /var/lib/containers/runroot
250250-cat > /var/lib/containers/storage.conf <<'EOF'
251251-[storage]
252252-driver = "overlay"
253253-runroot = "/var/lib/containers/runroot"
254254-graphroot = "/var/lib/containers/storage"
255255-EOF
256256-chown -R 1000:1000 /var/lib/containers
257257-echo "User and buildah storage setup complete"
258258-`},
259259- SecurityContext: &corev1.SecurityContext{
260260- AllowPrivilegeEscalation: &[]bool{false}[0],
261261- // Note: This init container runs as root to create the passwd/group files
262262- // All subsequent containers run as UID 1000 (non-root)
263263- RunAsNonRoot: &[]bool{false}[0],
264264- RunAsUser: &[]int64{0}[0],
265265- Capabilities: &corev1.Capabilities{
266266- Drop: []corev1.Capability{"ALL"},
267267- // CAP_CHOWN is needed to set ownership of home directory for UID 1000
268268- Add: []corev1.Capability{"CHOWN"},
269269- },
270270- },
271271- VolumeMounts: []corev1.VolumeMount{
272272- {
273273- Name: "etc-override",
274274- MountPath: "/etc-override",
275275- },
276276- {
277277- Name: "home-override",
278278- MountPath: "/home-override",
279279- },
280280- {
281281- Name: "buildah-storage",
282282- MountPath: "/var/lib/containers",
283283- },
284284- },
285285- },
286286- {
287287- Name: "install-runner",
288288- Image: config.LoomImage,
289289- Command: []string{"/loom-runner", "--install", "/runner-bin/loom-runner"},
290290- SecurityContext: &corev1.SecurityContext{
291291- AllowPrivilegeEscalation: &[]bool{false}[0],
292292- RunAsNonRoot: &[]bool{true}[0],
293293- RunAsUser: &[]int64{1000}[0],
294294- ReadOnlyRootFilesystem: &[]bool{true}[0],
295295- Capabilities: &corev1.Capabilities{
296296- Drop: []corev1.Capability{"ALL"},
297297- },
298298- },
299299- VolumeMounts: []corev1.VolumeMount{
300300- {
301301- Name: "runner-binary",
302302- MountPath: "/runner-bin",
303303- },
304304- },
305305- },
306306- buildCloneInitContainer(config),
307307- },
218218+ InitContainers: buildInitContainers(config),
308219309220 // Main container: run loom-runner binary in user's image
310221 // Use shell to prepend /runner-bin to PATH, preserving the image's PATH
···404315 return envFrom
405316}
406317318318+// buildInitContainers creates the init containers for the pod.
319319+// This function properly handles the optional clone container.
320320+func buildInitContainers(config WorkflowConfig) []corev1.Container {
321321+ initContainers := []corev1.Container{
322322+ // Setup user creates /etc/passwd and /etc/group entries for UID 1000
323323+ // This is needed because many tools (like buildah) require a valid passwd entry
324324+ // Also configures buildah storage directories and storage.conf
325325+ {
326326+ Name: "setup-user",
327327+ Image: "busybox:latest",
328328+ Command: []string{"/bin/sh", "-c"},
329329+ Args: []string{`
330330+cat > /etc-override/passwd <<'EOF'
331331+root:x:0:0:root:/root:/bin/bash
332332+nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin
333333+runner:x:1000:1000:runner:/home/runner:/bin/sh
334334+EOF
335335+cat > /etc-override/group <<'EOF'
336336+root:x:0:
337337+nobody:x:65534:
338338+runner:x:1000:
339339+EOF
340340+# subuid/subgid mappings for rootless buildah user namespaces
341341+cat > /etc-override/subuid <<'EOF'
342342+runner:100000:65536
343343+EOF
344344+cat > /etc-override/subgid <<'EOF'
345345+runner:100000:65536
346346+EOF
347347+# Create home directory structure then fix ownership
348348+mkdir -p /home-override/runner/.config/containers
349349+chmod 700 /home-override/runner/.config
350350+chown -R 1000:1000 /home-override/runner
351351+# Configure buildah storage directories (for workflows using buildah images)
352352+mkdir -p /var/lib/containers/storage /var/lib/containers/runroot
353353+cat > /var/lib/containers/storage.conf <<'EOF'
354354+[storage]
355355+driver = "overlay"
356356+runroot = "/var/lib/containers/runroot"
357357+graphroot = "/var/lib/containers/storage"
358358+EOF
359359+chown -R 1000:1000 /var/lib/containers
360360+echo "User and buildah storage setup complete"
361361+`},
362362+ SecurityContext: &corev1.SecurityContext{
363363+ AllowPrivilegeEscalation: &[]bool{false}[0],
364364+ // Note: This init container runs as root to create the passwd/group files
365365+ // All subsequent containers run as UID 1000 (non-root)
366366+ RunAsNonRoot: &[]bool{false}[0],
367367+ RunAsUser: &[]int64{0}[0],
368368+ Capabilities: &corev1.Capabilities{
369369+ Drop: []corev1.Capability{"ALL"},
370370+ // CAP_CHOWN is needed to set ownership of home directory for UID 1000
371371+ Add: []corev1.Capability{"CHOWN"},
372372+ },
373373+ },
374374+ VolumeMounts: []corev1.VolumeMount{
375375+ {
376376+ Name: "etc-override",
377377+ MountPath: "/etc-override",
378378+ },
379379+ {
380380+ Name: "home-override",
381381+ MountPath: "/home-override",
382382+ },
383383+ {
384384+ Name: "buildah-storage",
385385+ MountPath: "/var/lib/containers",
386386+ },
387387+ },
388388+ },
389389+ {
390390+ Name: "install-runner",
391391+ Image: config.LoomImage,
392392+ Command: []string{"/loom-runner", "--install", "/runner-bin/loom-runner"},
393393+ SecurityContext: &corev1.SecurityContext{
394394+ AllowPrivilegeEscalation: &[]bool{false}[0],
395395+ RunAsNonRoot: &[]bool{true}[0],
396396+ RunAsUser: &[]int64{1000}[0],
397397+ ReadOnlyRootFilesystem: &[]bool{true}[0],
398398+ Capabilities: &corev1.Capabilities{
399399+ Drop: []corev1.Capability{"ALL"},
400400+ },
401401+ },
402402+ VolumeMounts: []corev1.VolumeMount{
403403+ {
404404+ Name: "runner-binary",
405405+ MountPath: "/runner-bin",
406406+ },
407407+ },
408408+ },
409409+ }
410410+411411+ // Only add clone container if cloning is not skipped
412412+ if !config.SkipClone {
413413+ initContainers = append(initContainers, buildCloneInitContainer(config))
414414+ }
415415+416416+ return initContainers
417417+}
418418+407419// buildCloneInitContainer creates the init container for cloning the git repository.
408420// Uses the shared clone command builder from tangled.org/core/spindle/steps.
409421func buildCloneInitContainer(config WorkflowConfig) corev1.Container {
410410- // If clone is skipped, return an empty container (will be filtered out)
411411- if config.SkipClone {
412412- return corev1.Container{}
413413- }
414422415423 // Build the shell script from clone commands
416424 // Add set -e for error handling, safe.directory config to handle ownership mismatch
+61
internal/jobbuilder/job_template_test.go
···265265 }
266266}
267267268268+func TestBuildInitContainers(t *testing.T) {
269269+ tests := []struct {
270270+ name string
271271+ config WorkflowConfig
272272+ wantContainers int
273273+ wantCloneExists bool
274274+ }{
275275+ {
276276+ name: "includes clone container when SkipClone is false",
277277+ config: WorkflowConfig{
278278+ LoomImage: "loom:latest",
279279+ SkipClone: false,
280280+ CloneCommands: []string{"git clone https://example.com/repo"},
281281+ },
282282+ wantContainers: 3, // setup-user, install-runner, clone-repo
283283+ wantCloneExists: true,
284284+ },
285285+ {
286286+ name: "excludes clone container when SkipClone is true",
287287+ config: WorkflowConfig{
288288+ LoomImage: "loom:latest",
289289+ SkipClone: true,
290290+ },
291291+ wantContainers: 2, // setup-user, install-runner
292292+ wantCloneExists: false,
293293+ },
294294+ }
295295+296296+ for _, tt := range tests {
297297+ t.Run(tt.name, func(t *testing.T) {
298298+ containers := buildInitContainers(tt.config)
299299+300300+ if len(containers) != tt.wantContainers {
301301+ t.Errorf("buildInitContainers() = %d containers, want %d", len(containers), tt.wantContainers)
302302+ for i, c := range containers {
303303+ t.Logf(" container[%d]: %s", i, c.Name)
304304+ }
305305+ }
306306+307307+ // Verify all containers have valid names (non-empty)
308308+ for i, c := range containers {
309309+ if c.Name == "" {
310310+ t.Errorf("buildInitContainers() container[%d] has empty name", i)
311311+ }
312312+ }
313313+314314+ // Check for clone container
315315+ hasClone := false
316316+ for _, c := range containers {
317317+ if c.Name == "clone-repo" {
318318+ hasClone = true
319319+ break
320320+ }
321321+ }
322322+ if hasClone != tt.wantCloneExists {
323323+ t.Errorf("buildInitContainers() clone-repo exists = %v, want %v", hasClone, tt.wantCloneExists)
324324+ }
325325+ })
326326+ }
327327+}
328328+268329func TestBuildJob(t *testing.T) {
269330 tests := []struct {
270331 name string