tangled
alpha
login
or
join now
evan.jarrett.net
/
loom
10
fork
atom
Kubernetes Operator for Tangled Spindles
10
fork
atom
overview
issues
pulls
pipelines
unify dockerfile, create a release flow
evan.jarrett.net
3 months ago
ee82083d
bf34eb23
verified
This commit was signed with the committer's
known signature
.
evan.jarrett.net
SSH Key Fingerprint:
SHA256:bznk0uVPp7XFOl67P0uTM1pCjf2A4ojeP/lsUE7uauQ=
2/2
workflow-amd64.yaml
success
10s
workflow-arm64.yaml
success
8m 24s
+703
-69
10 changed files
expand all
collapse all
unified
split
.tangled
workflows
release.yaml
workflow-amd64.yaml
Dockerfile
Makefile
cmd
controller
main.go
runner
Dockerfile
config
manager
manager.yaml
docs
proposals
runtime-interface-upstream.md
internal
controller
spindleset_controller.go
jobbuilder
job_template.go
+32
.tangled/workflows/release.yaml
reviewed
···
1
1
+
# ATCR Release Pipeline for Tangled.org
2
2
+
# Triggers on version tags and builds cross-platform binaries using buildah
3
3
+
4
4
+
when:
5
5
+
- event: ["push"]
6
6
+
tag: ["v*"]
7
7
+
8
8
+
engine: kubernetes
9
9
+
image: quay.io/buildah/stable:latest
10
10
+
architecture: amd64
11
11
+
12
12
+
environment:
13
13
+
IMAGE_REGISTRY: atcr.io
14
14
+
15
15
+
steps:
16
16
+
- name: Login to registry
17
17
+
command: |
18
18
+
echo "${APP_PASSWORD}" | buildah login \
19
19
+
-u "${TANGLED_REPO_DID}" \
20
20
+
--password-stdin \
21
21
+
${IMAGE_REGISTRY}
22
22
+
23
23
+
- name: Build and push Loom image
24
24
+
command: |
25
25
+
buildah bud \
26
26
+
--tag ${IMAGE_REGISTRY}/${TANGLED_REPO_DID}/${TANGLED_REPO_NAME}:${TANGLED_REF_NAME} \
27
27
+
--tag ${IMAGE_REGISTRY}/${TANGLED_REPO_DID}/${TANGLED_REPO_NAME}:latest \
28
28
+
--file ./Dockerfile \
29
29
+
.
30
30
+
31
31
+
buildah push ${IMAGE_REGISTRY}/${TANGLED_REPO_DID}/${TANGLED_REPO_NAME}:latest
32
32
+
buildah push ${IMAGE_REGISTRY}/${TANGLED_REPO_DID}/${TANGLED_REPO_NAME}:${TANGLED_REF_NAME}
+11
-7
.tangled/workflows/workflow-amd64.yaml
reviewed
···
7
7
image: golang:1.25-trixie
8
8
architecture: amd64
9
9
10
10
+
environment:
11
11
+
IMAGE_REGISTRY: atcr.io
12
12
+
10
13
steps:
11
11
-
- name: build manager binary
14
14
+
- name: test environment vars
12
15
command: |
13
13
-
make build
16
16
+
printenv
14
17
15
15
-
- name: verify build artifacts
18
18
+
- name: Login to registry
16
19
command: |
17
17
-
ls -lh bin/
20
20
+
echo "${APP_PASSWORD}" | buildah login \
21
21
+
-u "${TANGLED_REPO_DID}" \
22
22
+
--password-stdin \
23
23
+
${IMAGE_REGISTRY}
24
24
+
18
25
19
19
-
- name: hello
20
20
-
command: |
21
21
-
echo "hello"
+23
-20
Dockerfile
reviewed
···
1
1
-
# Build the manager binary
2
2
-
FROM golang:1.24 AS builder
1
1
+
# Build both binaries
2
2
+
FROM --platform=$BUILDPLATFORM golang:1.24 AS builder
3
3
+
3
4
ARG TARGETOS
4
5
ARG TARGETARCH
5
6
6
7
WORKDIR /workspace
7
7
-
# Copy the core module (for local replace directive)
8
8
+
9
9
+
# Copy core dependency (from replace directive in go.mod)
8
10
COPY core/ core/
9
9
-
# Copy the Go Modules manifests
10
10
-
COPY loom/go.mod loom/go.mod
11
11
-
COPY loom/go.sum loom/go.sum
12
12
-
# cache deps before building and copying source so that we don't need to re-download as much
13
13
-
# and so that source changes don't invalidate our downloaded layer
11
11
+
12
12
+
# Copy loom go mod files and download deps
13
13
+
COPY loom/go.mod loom/go.sum loom/
14
14
WORKDIR /workspace/loom
15
15
RUN go mod download
16
16
17
17
-
# Copy the go source
18
18
-
COPY loom/cmd/controller/main.go cmd/controller/main.go
17
17
+
# Copy loom source code
19
18
COPY loom/api/ api/
19
19
+
COPY loom/cmd/ cmd/
20
20
COPY loom/internal/ internal/
21
21
22
22
-
# Build
23
23
-
# CGO is required for go-sqlite3
24
24
-
RUN CGO_ENABLED=1 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/controller/main.go
22
22
+
# Build runner (static, no CGO)
23
23
+
# Use -s -w to strip debug symbols and reduce binary size
24
24
+
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \
25
25
+
go build -a -ldflags='-s -w -extldflags "-static"' \
26
26
+
-o loom-runner ./cmd/runner
25
27
26
26
-
# Use debian-slim for debugging (normally we'd use distroless)
27
27
-
# Refer to https://github.com/GoogleContainerTools/distroless for more details
28
28
-
FROM debian:12-slim
29
29
-
RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/lib/apt/lists/*
30
30
-
WORKDIR /
31
31
-
COPY --from=builder /workspace/loom/manager .
32
32
-
USER 65532:65532
28
28
+
# Build controller (requires CGO for sqlite3)
29
29
+
# Use -s -w to strip debug symbols and reduce binary size
30
30
+
RUN CGO_ENABLED=1 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \
31
31
+
go build -a -ldflags='-s -w' -o manager ./cmd/controller
33
32
33
33
+
# Unified image with both binaries
34
34
+
FROM gcr.io/distroless/base-debian12:nonroot
35
35
+
COPY --from=builder /workspace/loom/manager /manager
36
36
+
COPY --from=builder /workspace/loom/loom-runner /loom-runner
34
37
ENTRYPOINT ["/manager"]
+2
-2
Makefile
reviewed
···
225
225
--tag $(RUNNER_IMG) \
226
226
--tag $(RUNNER_IMG_LATEST) \
227
227
--push \
228
228
-
--file loom/cmd/runner/Dockerfile \
228
228
+
--file Dockerfile.runner \
229
229
.
230
230
231
231
.PHONY: docker-build-runner-local
232
232
docker-build-runner-local: ## Build local runner image (single arch) for testing.
233
233
cd .. && $(CONTAINER_TOOL) build \
234
234
--tag $(RUNNER_IMG_LATEST) \
235
235
-
--file loom/cmd/runner/Dockerfile \
235
235
+
--file Dockerfile.runner \
236
236
.
237
237
238
238
.PHONY: test-registry-auth
+11
-4
cmd/controller/main.go
reviewed
···
397
397
}
398
398
}()
399
399
400
400
+
// Get loom image from environment (used for runner init container)
401
401
+
loomImage := os.Getenv("LOOM_IMAGE")
402
402
+
if loomImage == "" {
403
403
+
loomImage = "atcr.io/evan.jarrett.net/loom:latest" // default fallback
404
404
+
}
405
405
+
400
406
// Setup controller with spindle components
401
407
if err := (&controller.SpindleSetReconciler{
402
402
-
Client: mgr.GetClient(),
403
403
-
Scheme: mgr.GetScheme(),
404
404
-
Config: mgr.GetConfig(),
405
405
-
Spindle: s,
408
408
+
Client: mgr.GetClient(),
409
409
+
Scheme: mgr.GetScheme(),
410
410
+
Config: mgr.GetConfig(),
411
411
+
Spindle: s,
412
412
+
LoomImage: loomImage,
406
413
}).SetupWithManager(mgr); err != nil {
407
414
setupLog.Error(err, "unable to create controller", "controller", "SpindleSet")
408
415
os.Exit(1)
-35
cmd/runner/Dockerfile
reviewed
···
1
1
-
FROM --platform=$BUILDPLATFORM golang:1.25 AS builder
2
2
-
3
3
-
ARG TARGETOS
4
4
-
ARG TARGETARCH
5
5
-
6
6
-
WORKDIR /workspace
7
7
-
8
8
-
# Copy core dependency (from replace directive in go.mod)
9
9
-
COPY core/ core/
10
10
-
11
11
-
# Copy loom go mod files
12
12
-
COPY loom/go.mod loom/go.sum loom/
13
13
-
WORKDIR /workspace/loom
14
14
-
RUN go mod download
15
15
-
16
16
-
# Copy loom source code
17
17
-
COPY loom/api/ api/
18
18
-
COPY loom/cmd/runner/ cmd/runner/
19
19
-
20
20
-
# Build static binary
21
21
-
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \
22
22
-
go build -a -ldflags '-extldflags "-static"' \
23
23
-
-o loom-runner ./cmd/runner
24
24
-
25
25
-
# Use minimal base image
26
26
-
FROM alpine:latest
27
27
-
28
28
-
# Install ca-certificates for HTTPS
29
29
-
RUN apk --no-cache add ca-certificates
30
30
-
31
31
-
# Copy the binary from builder (built in /workspace/loom/)
32
32
-
COPY --from=builder /workspace/loom/loom-runner /loom-runner
33
33
-
34
34
-
# Set entrypoint
35
35
-
ENTRYPOINT ["/loom-runner"]
+2
config/manager/manager.yaml
reviewed
···
74
74
valueFrom:
75
75
fieldRef:
76
76
fieldPath: metadata.namespace
77
77
+
- name: LOOM_IMAGE
78
78
+
value: "atcr.io/evan.jarrett.net/loom:latest"
77
79
- name: SPINDLE_SERVER_HOSTNAME
78
80
value: "loom.jarrett.net"
79
81
- name: SPINDLE_SERVER_OWNER
+612
docs/proposals/runtime-interface-upstream.md
reviewed
···
1
1
+
# Proposal: Runtime Interface for Spindle Engines
2
2
+
3
3
+
**Status:** Draft
4
4
+
**Author:** @evanjarrett
5
5
+
**Date:** 2025-12-09
6
6
+
7
7
+
## Summary
8
8
+
9
9
+
Extract a `Runtime` interface from the nixery engine to enable:
10
10
+
1. A new `engine:docker` that accepts user-specified images
11
11
+
2. Support for multiple container runtimes (Docker, Podman)
12
12
+
3. Downstream implementations (e.g., Kubernetes in Loom)
13
13
+
14
14
+
## Motivation
15
15
+
16
16
+
Currently, the spindle engine architecture tightly couples workflow execution logic with Docker-specific container management in the nixery engine. This creates several limitations:
17
17
+
18
18
+
1. **No user-specified images**: Users must declare Nix dependencies; they can't use pre-built Docker images like `node:20` or `golang:1.22`
19
19
+
20
20
+
2. **Single runtime**: Only Docker daemon is supported; no path to Podman or other OCI runtimes
21
21
+
22
22
+
3. **Downstream friction**: Loom (Kubernetes-based spindle) must reimplement the entire Engine interface rather than reusing workflow parsing and step execution logic
23
23
+
24
24
+
## Proposal
25
25
+
26
26
+
### New Runtime Interface
27
27
+
28
28
+
Create `spindle/models/runtime.go`:
29
29
+
30
30
+
```go
31
31
+
package models
32
32
+
33
33
+
import (
34
34
+
"context"
35
35
+
"io"
36
36
+
)
37
37
+
38
38
+
// Runtime abstracts container/job execution environments.
39
39
+
// Implementations: Docker, Podman, (downstream: Kubernetes)
40
40
+
type Runtime interface {
41
41
+
// Setup creates the execution environment and returns a handle.
42
42
+
// The environment should be ready for Exec calls after Setup returns.
43
43
+
Setup(ctx context.Context, opts SetupOpts) (Handle, error)
44
44
+
45
45
+
// Exec runs a command in the environment.
46
46
+
// For container runtimes, this is typically `exec` into a running container.
47
47
+
// For job-based runtimes (K8s), this may stream logs from a pre-defined job.
48
48
+
Exec(ctx context.Context, h Handle, opts ExecOpts) (*ExecResult, error)
49
49
+
50
50
+
// Destroy tears down the environment and releases resources.
51
51
+
Destroy(ctx context.Context, h Handle) error
52
52
+
}
53
53
+
54
54
+
// SetupOpts configures the execution environment.
55
55
+
type SetupOpts struct {
56
56
+
// Image is the container image to use (e.g., "node:20", "nixery.dev/shell/git")
57
57
+
Image string
58
58
+
59
59
+
// WorkflowID uniquely identifies this workflow run (used for labeling/naming)
60
60
+
WorkflowID WorkflowId
61
61
+
62
62
+
// WorkDir is the working directory inside the container (e.g., "/tangled/workspace")
63
63
+
WorkDir string
64
64
+
65
65
+
// Labels for the container/job (e.g., {"sh.tangled.pipeline/workflow_id": "..."})
66
66
+
Labels map[string]string
67
67
+
68
68
+
// Security options
69
69
+
DropAllCaps bool
70
70
+
AddCaps []string // e.g., ["DAC_OVERRIDE", "CHOWN", "FOWNER", "SETUID", "SETGID"]
71
71
+
72
72
+
// Architecture hint for multi-arch scheduling (used by K8s runtime)
73
73
+
Architecture string
74
74
+
}
75
75
+
76
76
+
// ExecOpts configures a single command execution.
77
77
+
type ExecOpts struct {
78
78
+
// Command to run (e.g., ["bash", "-c", "npm install"])
79
79
+
Command []string
80
80
+
81
81
+
// Environment variables
82
82
+
Env []string
83
83
+
84
84
+
// Output streams (nil = discard)
85
85
+
Stdout io.Writer
86
86
+
Stderr io.Writer
87
87
+
}
88
88
+
89
89
+
// ExecResult contains the outcome of an Exec call.
90
90
+
type ExecResult struct {
91
91
+
ExitCode int
92
92
+
OOMKilled bool
93
93
+
}
94
94
+
95
95
+
// Handle is an opaque reference to an execution environment.
96
96
+
type Handle interface {
97
97
+
// ID returns a unique identifier for this environment (container ID, job name, etc.)
98
98
+
ID() string
99
99
+
}
100
100
+
101
101
+
// RuntimeMode indicates how the runtime executes steps.
102
102
+
type RuntimeMode int
103
103
+
104
104
+
const (
105
105
+
// RuntimeModeExec means steps are executed one at a time via Exec calls.
106
106
+
// Used by Docker/Podman where we exec into a running container.
107
107
+
RuntimeModeExec RuntimeMode = iota
108
108
+
109
109
+
// RuntimeModeBatch means all steps run in a single invocation.
110
110
+
// Used by Kubernetes where a Job runs all steps and the engine streams logs.
111
111
+
// In this mode, Exec() streams logs rather than executing commands.
112
112
+
RuntimeModeBatch
113
113
+
)
114
114
+
115
115
+
// RuntimeInfo provides metadata about a runtime implementation.
116
116
+
type RuntimeInfo interface {
117
117
+
Mode() RuntimeMode
118
118
+
}
119
119
+
```
120
120
+
121
121
+
### Refactored Engine Structure
122
122
+
123
123
+
Engines become thin wrappers focused on image resolution and step generation:
124
124
+
125
125
+
```go
126
126
+
// engines/base/engine.go - shared logic
127
127
+
package base
128
128
+
129
129
+
type Engine struct {
130
130
+
Runtime models.Runtime
131
131
+
Logger *slog.Logger
132
132
+
Handles map[string]models.Handle
133
133
+
mu sync.Mutex
134
134
+
}
135
135
+
136
136
+
func (e *Engine) SetupWorkflow(ctx context.Context, wid models.WorkflowId, wf *models.Workflow) error {
137
137
+
data := wf.Data.(WorkflowData)
138
138
+
139
139
+
handle, err := e.Runtime.Setup(ctx, models.SetupOpts{
140
140
+
Image: data.Image,
141
141
+
WorkflowID: wid,
142
142
+
WorkDir: "/tangled/workspace",
143
143
+
Labels: map[string]string{"sh.tangled.pipeline/workflow_id": wid.String()},
144
144
+
DropAllCaps: true,
145
145
+
AddCaps: []string{"DAC_OVERRIDE", "CHOWN", "FOWNER", "SETUID", "SETGID"},
146
146
+
})
147
147
+
if err != nil {
148
148
+
return err
149
149
+
}
150
150
+
151
151
+
e.mu.Lock()
152
152
+
e.Handles[wid.String()] = handle
153
153
+
e.mu.Unlock()
154
154
+
155
155
+
// Create workspace directories
156
156
+
_, err = e.Runtime.Exec(ctx, handle, models.ExecOpts{
157
157
+
Command: []string{"mkdir", "-p", "/tangled/workspace", "/tangled/home"},
158
158
+
})
159
159
+
return err
160
160
+
}
161
161
+
162
162
+
func (e *Engine) RunStep(ctx context.Context, wid models.WorkflowId, w *models.Workflow, idx int, secrets []secrets.UnlockedSecret, wfLogger *models.WorkflowLogger) error {
163
163
+
e.mu.Lock()
164
164
+
handle := e.Handles[wid.String()]
165
165
+
e.mu.Unlock()
166
166
+
167
167
+
step := w.Steps[idx]
168
168
+
env := buildEnvs(w.Environment, step, secrets)
169
169
+
170
170
+
var stdout, stderr io.Writer
171
171
+
if wfLogger != nil {
172
172
+
stdout = wfLogger.DataWriter(idx, "stdout")
173
173
+
stderr = wfLogger.DataWriter(idx, "stderr")
174
174
+
}
175
175
+
176
176
+
result, err := e.Runtime.Exec(ctx, handle, models.ExecOpts{
177
177
+
Command: []string{"bash", "-c", step.Command()},
178
178
+
Env: env,
179
179
+
Stdout: stdout,
180
180
+
Stderr: stderr,
181
181
+
})
182
182
+
if err != nil {
183
183
+
return err
184
184
+
}
185
185
+
if result.OOMKilled {
186
186
+
return ErrOOMKilled
187
187
+
}
188
188
+
if result.ExitCode != 0 {
189
189
+
return engine.ErrWorkflowFailed
190
190
+
}
191
191
+
return nil
192
192
+
}
193
193
+
194
194
+
func (e *Engine) DestroyWorkflow(ctx context.Context, wid models.WorkflowId) error {
195
195
+
e.mu.Lock()
196
196
+
handle, exists := e.Handles[wid.String()]
197
197
+
delete(e.Handles, wid.String())
198
198
+
e.mu.Unlock()
199
199
+
200
200
+
if !exists {
201
201
+
return nil
202
202
+
}
203
203
+
return e.Runtime.Destroy(ctx, handle)
204
204
+
}
205
205
+
```
206
206
+
207
207
+
### Engine Implementations
208
208
+
209
209
+
**Nixery Engine** (image from dependencies):
210
210
+
211
211
+
```go
212
212
+
// engines/nixery/engine.go
213
213
+
package nixery
214
214
+
215
215
+
type Engine struct {
216
216
+
*base.Engine
217
217
+
cfg *config.Config
218
218
+
}
219
219
+
220
220
+
func (e *Engine) InitWorkflow(twf tangled.Pipeline_Workflow, tpl tangled.Pipeline) (*models.Workflow, error) {
221
221
+
var spec struct {
222
222
+
Steps []StepSpec `yaml:"steps"`
223
223
+
Dependencies map[string][]string `yaml:"dependencies"`
224
224
+
Environment map[string]string `yaml:"environment"`
225
225
+
}
226
226
+
yaml.Unmarshal([]byte(twf.Raw), &spec)
227
227
+
228
228
+
// NIXERY-SPECIFIC: Build image URL from dependencies
229
229
+
image := workflowImage(spec.Dependencies, e.cfg.NixeryPipelines.Nixery)
230
230
+
231
231
+
steps := []models.Step{}
232
232
+
233
233
+
// Add nixery-specific setup steps
234
234
+
steps = append(steps, nixConfStep())
235
235
+
steps = append(steps, models.BuildCloneStep(twf, *tpl.TriggerMetadata, e.cfg.Server.Dev))
236
236
+
if depStep := dependencyStep(spec.Dependencies); depStep != nil {
237
237
+
steps = append(steps, *depStep)
238
238
+
}
239
239
+
240
240
+
// Add user steps
241
241
+
for _, s := range spec.Steps {
242
242
+
steps = append(steps, Step{name: s.Name, command: s.Command, ...})
243
243
+
}
244
244
+
245
245
+
return &models.Workflow{
246
246
+
Name: twf.Name,
247
247
+
Steps: steps,
248
248
+
Environment: spec.Environment,
249
249
+
Data: base.WorkflowData{Image: image},
250
250
+
}, nil
251
251
+
}
252
252
+
253
253
+
func (e *Engine) WorkflowTimeout() time.Duration {
254
254
+
// ... existing config-based timeout
255
255
+
}
256
256
+
```
257
257
+
258
258
+
**Docker Engine** (user-specified image):
259
259
+
260
260
+
```go
261
261
+
// engines/docker/engine.go
262
262
+
package docker
263
263
+
264
264
+
type Engine struct {
265
265
+
*base.Engine
266
266
+
}
267
267
+
268
268
+
func (e *Engine) InitWorkflow(twf tangled.Pipeline_Workflow, tpl tangled.Pipeline) (*models.Workflow, error) {
269
269
+
var spec struct {
270
270
+
Image string `yaml:"image"`
271
271
+
Steps []StepSpec `yaml:"steps"`
272
272
+
Environment map[string]string `yaml:"environment"`
273
273
+
}
274
274
+
yaml.Unmarshal([]byte(twf.Raw), &spec)
275
275
+
276
276
+
// DOCKER-SPECIFIC: Require explicit image
277
277
+
if spec.Image == "" {
278
278
+
return nil, fmt.Errorf("docker engine requires 'image' field in workflow")
279
279
+
}
280
280
+
281
281
+
steps := []models.Step{}
282
282
+
283
283
+
// Add clone step (shared with nixery)
284
284
+
steps = append(steps, models.BuildCloneStep(twf, *tpl.TriggerMetadata, false))
285
285
+
286
286
+
// Add user steps
287
287
+
for _, s := range spec.Steps {
288
288
+
steps = append(steps, SimpleStep{Name: s.Name, Command: s.Command, ...})
289
289
+
}
290
290
+
291
291
+
return &models.Workflow{
292
292
+
Name: twf.Name,
293
293
+
Steps: steps,
294
294
+
Environment: spec.Environment,
295
295
+
Data: base.WorkflowData{Image: spec.Image},
296
296
+
}, nil
297
297
+
}
298
298
+
299
299
+
func (e *Engine) WorkflowTimeout() time.Duration {
300
300
+
return 1 * time.Hour // default
301
301
+
}
302
302
+
```
303
303
+
304
304
+
### Runtime Implementations
305
305
+
306
306
+
**Docker Runtime**:
307
307
+
308
308
+
```go
309
309
+
// runtime/docker/runtime.go
310
310
+
package docker
311
311
+
312
312
+
type Runtime struct {
313
313
+
client client.APIClient
314
314
+
logger *slog.Logger
315
315
+
}
316
316
+
317
317
+
type handle struct {
318
318
+
containerID string
319
319
+
networkID string
320
320
+
}
321
321
+
322
322
+
func (h *handle) ID() string { return h.containerID }
323
323
+
324
324
+
func (r *Runtime) Setup(ctx context.Context, opts models.SetupOpts) (models.Handle, error) {
325
325
+
// Create network
326
326
+
netResp, _ := r.client.NetworkCreate(ctx, networkName(opts.WorkflowID), ...)
327
327
+
328
328
+
// Pull image
329
329
+
reader, _ := r.client.ImagePull(ctx, opts.Image, image.PullOptions{})
330
330
+
io.Copy(io.Discard, reader)
331
331
+
reader.Close()
332
332
+
333
333
+
// Create container
334
334
+
resp, _ := r.client.ContainerCreate(ctx, &container.Config{
335
335
+
Image: opts.Image,
336
336
+
Cmd: []string{"cat"},
337
337
+
OpenStdin: true,
338
338
+
WorkingDir: opts.WorkDir,
339
339
+
Labels: opts.Labels,
340
340
+
}, &container.HostConfig{
341
341
+
CapDrop: capDrop(opts.DropAllCaps),
342
342
+
CapAdd: opts.AddCaps,
343
343
+
// ... mounts, security opts
344
344
+
}, nil, nil, "")
345
345
+
346
346
+
r.client.ContainerStart(ctx, resp.ID, container.StartOptions{})
347
347
+
348
348
+
return &handle{containerID: resp.ID, networkID: netResp.ID}, nil
349
349
+
}
350
350
+
351
351
+
func (r *Runtime) Exec(ctx context.Context, h models.Handle, opts models.ExecOpts) (*models.ExecResult, error) {
352
352
+
dh := h.(*handle)
353
353
+
354
354
+
execResp, _ := r.client.ContainerExecCreate(ctx, dh.containerID, container.ExecOptions{
355
355
+
Cmd: opts.Command,
356
356
+
Env: opts.Env,
357
357
+
AttachStdout: true,
358
358
+
AttachStderr: true,
359
359
+
})
360
360
+
361
361
+
attach, _ := r.client.ContainerExecAttach(ctx, execResp.ID, container.ExecAttachOptions{})
362
362
+
defer attach.Close()
363
363
+
364
364
+
stdcopy.StdCopy(opts.Stdout, opts.Stderr, attach.Reader)
365
365
+
366
366
+
inspect, _ := r.client.ContainerExecInspect(ctx, execResp.ID)
367
367
+
368
368
+
// Check OOMKilled
369
369
+
containerInspect, _ := r.client.ContainerInspect(ctx, dh.containerID)
370
370
+
371
371
+
return &models.ExecResult{
372
372
+
ExitCode: inspect.ExitCode,
373
373
+
OOMKilled: containerInspect.State.OOMKilled,
374
374
+
}, nil
375
375
+
}
376
376
+
377
377
+
func (r *Runtime) Destroy(ctx context.Context, h models.Handle) error {
378
378
+
dh := h.(*handle)
379
379
+
r.client.ContainerStop(ctx, dh.containerID, container.StopOptions{})
380
380
+
r.client.ContainerRemove(ctx, dh.containerID, container.RemoveOptions{RemoveVolumes: true})
381
381
+
r.client.NetworkRemove(ctx, dh.networkID)
382
382
+
return nil
383
383
+
}
384
384
+
```
385
385
+
386
386
+
**Podman Runtime**:
387
387
+
388
388
+
```go
389
389
+
// runtime/podman/runtime.go
390
390
+
package podman
391
391
+
392
392
+
// Podman is API-compatible with Docker for most operations.
393
393
+
// This runtime uses the Podman socket instead of Docker socket.
394
394
+
395
395
+
type Runtime struct {
396
396
+
client *podman.APIClient // or use Docker client with Podman socket
397
397
+
logger *slog.Logger
398
398
+
}
399
399
+
400
400
+
// Implementation nearly identical to Docker runtime.
401
401
+
// Key differences:
402
402
+
// - Socket path: /run/user/1000/podman/podman.sock (rootless) or /run/podman/podman.sock
403
403
+
// - Some API differences in network handling
404
404
+
// - Native support for rootless containers
405
405
+
```
406
406
+
407
407
+
### Kubernetes Runtime (Downstream - Loom)
408
408
+
409
409
+
```go
410
410
+
// In loom repo: runtime/kubernetes/runtime.go
411
411
+
package kubernetes
412
412
+
413
413
+
type Runtime struct {
414
414
+
client client.Client
415
415
+
config *rest.Config
416
416
+
namespace string
417
417
+
template SpindleTemplate
418
418
+
}
419
419
+
420
420
+
func (r *Runtime) Mode() models.RuntimeMode {
421
421
+
return models.RuntimeModeBatch // All steps run in single Job
422
422
+
}
423
423
+
424
424
+
func (r *Runtime) Setup(ctx context.Context, opts models.SetupOpts) (models.Handle, error) {
425
425
+
// Build Job spec with:
426
426
+
// - Init containers for setup (user namespace, clone, etc.)
427
427
+
// - Main container running loom-runner with all steps
428
428
+
// - Node affinity based on opts.Architecture
429
429
+
430
430
+
job := jobbuilder.BuildJob(jobbuilder.WorkflowConfig{
431
431
+
Image: opts.Image,
432
432
+
Architecture: opts.Architecture,
433
433
+
// ... steps passed via ConfigMap
434
434
+
})
435
435
+
436
436
+
r.client.Create(ctx, job)
437
437
+
438
438
+
// Wait for pod to be running
439
439
+
pod := waitForPod(ctx, job)
440
440
+
441
441
+
return &k8sHandle{
442
442
+
jobName: job.Name,
443
443
+
podName: pod.Name,
444
444
+
}, nil
445
445
+
}
446
446
+
447
447
+
func (r *Runtime) Exec(ctx context.Context, h models.Handle, opts models.ExecOpts) (*models.ExecResult, error) {
448
448
+
// In batch mode, Exec streams logs rather than executing commands.
449
449
+
// The loom-runner binary inside the Job executes all steps.
450
450
+
// This method reads log output and returns when the step completes.
451
451
+
452
452
+
kh := h.(*k8sHandle)
453
453
+
454
454
+
// Stream logs from pod, parse JSON log lines from loom-runner
455
455
+
// Return when step end marker is seen
456
456
+
457
457
+
return &models.ExecResult{ExitCode: 0}, nil
458
458
+
}
459
459
+
460
460
+
func (r *Runtime) Destroy(ctx context.Context, h models.Handle) error {
461
461
+
kh := h.(*k8sHandle)
462
462
+
// Delete Job (GC handles pod cleanup)
463
463
+
return r.client.Delete(ctx, &batchv1.Job{ObjectMeta: metav1.ObjectMeta{Name: kh.jobName}})
464
464
+
}
465
465
+
```
466
466
+
467
467
+
### Workflow YAML Examples
468
468
+
469
469
+
**engine:nixery** (current behavior):
470
470
+
```yaml
471
471
+
engine: nixery
472
472
+
dependencies:
473
473
+
nixpkgs:
474
474
+
- nodejs
475
475
+
- python3
476
476
+
steps:
477
477
+
- name: build
478
478
+
command: npm run build
479
479
+
```
480
480
+
481
481
+
**engine:docker** (new):
482
482
+
```yaml
483
483
+
engine: docker
484
484
+
image: node:20-alpine
485
485
+
steps:
486
486
+
- name: install
487
487
+
command: npm ci
488
488
+
- name: build
489
489
+
command: npm run build
490
490
+
- name: test
491
491
+
command: npm test
492
492
+
```
493
493
+
494
494
+
**engine:kubernetes** (downstream, in Loom):
495
495
+
```yaml
496
496
+
engine: kubernetes
497
497
+
image: golang:1.22
498
498
+
architecture: arm64
499
499
+
steps:
500
500
+
- name: build
501
501
+
command: go build ./...
502
502
+
- name: test
503
503
+
command: go test ./...
504
504
+
```
505
505
+
506
506
+
### Server Wiring
507
507
+
508
508
+
```go
509
509
+
// server.go
510
510
+
func Run(ctx context.Context) error {
511
511
+
cfg, err := config.Load(ctx)
512
512
+
513
513
+
// Create runtime based on config
514
514
+
var rt models.Runtime
515
515
+
switch cfg.Runtime.Type {
516
516
+
case "docker":
517
517
+
dockerClient, _ := client.NewClientWithOpts(client.FromEnv)
518
518
+
rt = docker.NewRuntime(dockerClient, logger)
519
519
+
case "podman":
520
520
+
podmanClient, _ := podman.NewClient(cfg.Runtime.Podman.Socket)
521
521
+
rt = podman.NewRuntime(podmanClient, logger)
522
522
+
default:
523
523
+
rt = docker.NewRuntime(...) // default
524
524
+
}
525
525
+
526
526
+
// Create engines with shared runtime
527
527
+
nixeryEng := nixery.New(rt, cfg, logger)
528
528
+
dockerEng := dockerengine.New(rt, logger)
529
529
+
530
530
+
s, _ := New(ctx, cfg, map[string]models.Engine{
531
531
+
"nixery": nixeryEng,
532
532
+
"docker": dockerEng,
533
533
+
})
534
534
+
535
535
+
return s.Start(ctx)
536
536
+
}
537
537
+
```
538
538
+
539
539
+
### Configuration
540
540
+
541
541
+
```toml
542
542
+
# spindle.toml
543
543
+
544
544
+
[runtime]
545
545
+
type = "docker" # or "podman"
546
546
+
547
547
+
[runtime.docker]
548
548
+
# Uses DOCKER_HOST env var by default
549
549
+
550
550
+
[runtime.podman]
551
551
+
socket = "/run/user/1000/podman/podman.sock"
552
552
+
```
553
553
+
554
554
+
## Migration Path
555
555
+
556
556
+
### Phase 1: Extract Runtime Interface
557
557
+
- Add `models/runtime.go` with interface definition
558
558
+
- Add `runtime/docker/` implementation
559
559
+
- Refactor nixery to use docker runtime internally
560
560
+
- No breaking changes to existing workflows
561
561
+
562
562
+
### Phase 2: Add Docker Engine
563
563
+
- Add `engines/docker/` that uses same runtime
564
564
+
- Register as `"docker"` in engine map
565
565
+
- Users can now use `engine: docker` with explicit images
566
566
+
567
567
+
### Phase 3: Add Podman Runtime
568
568
+
- Add `runtime/podman/` implementation
569
569
+
- Add config option to select runtime
570
570
+
- Podman users can run nixery/docker engines without Docker daemon
571
571
+
572
572
+
### Phase 4: Downstream Kubernetes (Loom)
573
573
+
- Loom implements `runtime/kubernetes/`
574
574
+
- Can register `"kubernetes"` engine or reuse `"docker"`/`"nixery"` engines with K8s runtime
575
575
+
- Maintains current Job + loom-runner architecture
576
576
+
577
577
+
## Alternatives Considered
578
578
+
579
579
+
### 1. Keep engines monolithic
580
580
+
- Pro: Simpler, less abstraction
581
581
+
- Con: Code duplication, can't swap runtimes, harder for downstream
582
582
+
583
583
+
### 2. Docker-in-Docker for Kubernetes
584
584
+
- Pro: Identical behavior to local execution
585
585
+
- Con: Security concerns, complexity, resource overhead
586
586
+
587
587
+
### 3. Runtime as engine parameter
588
588
+
- Pro: More flexible per-workflow
589
589
+
- Con: Overcomplicates workflow YAML, runtime is deployment choice not user choice
590
590
+
591
591
+
## Open Questions
592
592
+
593
593
+
1. **Should runtime selection be per-engine or global?**
594
594
+
- Proposal: Global (deployment config), not per-workflow
595
595
+
596
596
+
2. **How to handle runtime-specific features?**
597
597
+
- E.g., K8s node affinity, Docker network modes
598
598
+
- Proposal: `SetupOpts` has optional fields; runtimes ignore unsupported options
599
599
+
600
600
+
3. **Should we upstream the Kubernetes runtime?**
601
601
+
- Proposal: No, keep in Loom. Upstream provides interface, downstream implements.
602
602
+
603
603
+
4. **Podman rootless considerations?**
604
604
+
- User namespaces, different capability handling
605
605
+
- Need testing matrix
606
606
+
607
607
+
## References
608
608
+
609
609
+
- [Current nixery engine](/home/data/core/spindle/engines/nixery/engine.go)
610
610
+
- [Loom KubernetesEngine](/home/data/loom/internal/engine/kubernetes_engine.go)
611
611
+
- [Docker Engine API](https://docs.docker.com/engine/api/)
612
612
+
- [Podman API](https://docs.podman.io/en/latest/_static/api.html)
+5
internal/controller/spindleset_controller.go
reviewed
···
49
49
Config *rest.Config
50
50
Spindle *spindle.Spindle
51
51
52
52
+
// LoomImage is the loom image containing the runner binary
53
53
+
// Set from LOOM_IMAGE environment variable
54
54
+
LoomImage string
55
55
+
52
56
// Track watched Jobs for status reporting
53
57
watchedJobs sync.Map // map[string]models.WorkflowId
54
58
}
···
439
443
PipelineID: pipelineRun.PipelineID,
440
444
SpindleSetName: spindleSet.Name,
441
445
Image: workflowSpec.Image,
446
446
+
LoomImage: r.LoomImage,
442
447
Architecture: workflowSpec.Architecture,
443
448
Steps: jobSteps,
444
449
WorkflowSpec: workflowSpec, // Pass full workflow spec to runner
+5
-1
internal/jobbuilder/job_template.go
reviewed
···
34
34
// Image is the container image to use for execution
35
35
Image string
36
36
37
37
+
// LoomImage is the loom image containing the runner binary
38
38
+
// Used by the install-runner init container
39
39
+
LoomImage string
40
40
+
37
41
// Architecture is the target architecture (amd64, arm64)
38
42
Architecture string
39
43
···
263
267
},
264
268
{
265
269
Name: "install-runner",
266
266
-
Image: "atcr.io/evan.jarrett.net/loom-runner:latest",
270
270
+
Image: config.LoomImage,
267
271
Command: []string{"cp", "/loom-runner", "/runner-bin/loom-runner"},
268
272
SecurityContext: &corev1.SecurityContext{
269
273
AllowPrivilegeEscalation: &[]bool{false}[0],