Kubernetes Operator for Tangled Spindles

update with upstream spindle features. fix buildah support

evan.jarrett.net bf34eb23 da57e777

verified
+95 -25
+8
config/manager/manager.yaml
··· 5 5 control-plane: controller-manager 6 6 app.kubernetes.io/name: loom 7 7 app.kubernetes.io/managed-by: kustomize 8 + # Privileged policy allows Unconfined seccomp for buildah user namespaces. 9 + # The spindle jobs themselves are still hardened (non-root, no caps, no privilege escalation). 10 + pod-security.kubernetes.io/enforce: privileged 11 + pod-security.kubernetes.io/audit: privileged 12 + pod-security.kubernetes.io/warn: privileged 8 13 name: system 9 14 --- 10 15 apiVersion: apps/v1 ··· 22 27 control-plane: controller-manager 23 28 app.kubernetes.io/name: loom 24 29 replicas: 1 30 + # Use Recreate strategy because we use RWO PVCs that can only attach to one pod 31 + strategy: 32 + type: Recreate 25 33 template: 26 34 metadata: 27 35 annotations:
+3 -3
go.mod
··· 2 2 3 3 go 1.24.4 4 4 5 - toolchain go1.24.9 5 + toolchain go1.24.10 6 6 7 7 require ( 8 + github.com/cenkalti/backoff/v4 v4.3.0 8 9 github.com/cyphar/filepath-securejoin v0.4.1 9 10 github.com/onsi/ginkgo/v2 v2.23.4 10 11 github.com/onsi/gomega v1.37.0 ··· 31 32 github.com/carlmjohnson/versioninfo v0.22.5 // indirect 32 33 github.com/casbin/casbin/v2 v2.103.0 // indirect 33 34 github.com/casbin/govaluate v1.3.0 // indirect 34 - github.com/cenkalti/backoff/v4 v4.3.0 // indirect 35 35 github.com/cespare/xxhash/v2 v2.3.0 // indirect 36 36 github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc // indirect 37 37 github.com/charmbracelet/lipgloss v1.1.0 // indirect ··· 197 197 ) 198 198 199 199 // Use our custom version of tangled until its upstreamed 200 - replace tangled.org/core => tangled.org/evan.jarrett.net/core v1.11.0-alpha.0.20251124173227-196aa76bafc3 200 + replace tangled.org/core => tangled.org/evan.jarrett.net/core v1.11.0-alpha.0.20251205153606-d61374e9daa6
+2 -2
go.sum
··· 687 687 sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= 688 688 sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= 689 689 sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= 690 - tangled.org/evan.jarrett.net/core v1.11.0-alpha.0.20251124173227-196aa76bafc3 h1:uoAq/kDgUByxxG0kxjIRoBbcIzoU119DAI1hsNYIyVY= 691 - tangled.org/evan.jarrett.net/core v1.11.0-alpha.0.20251124173227-196aa76bafc3/go.mod h1:DpfEc3N9VfsIYVcXwP71zDQpGWnTQ3wBLBxqV0oom8g= 690 + tangled.org/evan.jarrett.net/core v1.11.0-alpha.0.20251205153606-d61374e9daa6 h1:PyvQ+Ptvb34ZNgF/IMi8UpYOdE7GjnXp/bZpLk2E/pU= 691 + tangled.org/evan.jarrett.net/core v1.11.0-alpha.0.20251205153606-d61374e9daa6/go.mod h1:DpfEc3N9VfsIYVcXwP71zDQpGWnTQ3wBLBxqV0oom8g=
+9 -1
internal/engine/kubernetes_engine.go
··· 94 94 // InitWorkflow parses the workflow YAML and initializes a Workflow model. 95 95 // Pipeline environment variables (TANGLED_*) are injected into workflow.Environment 96 96 // by the framework after this method returns. 97 - func (e *KubernetesEngine) InitWorkflow(twf tangled.Pipeline_Workflow, cloneStep models.CloneStep) (*models.Workflow, error) { 97 + func (e *KubernetesEngine) InitWorkflow(twf tangled.Pipeline_Workflow, tpl tangled.Pipeline) (*models.Workflow, error) { 98 98 // Parse the Raw YAML into the unified WorkflowSpec type 99 99 var spec loomv1alpha1.WorkflowSpec 100 100 if err := yaml.Unmarshal([]byte(twf.Raw), &spec); err != nil { ··· 122 122 StepCommand: stepSpec.Command, 123 123 StepKind: models.StepKindUser, 124 124 }) 125 + } 126 + 127 + // Build clone step (uses upstream models.BuildCloneStep which is self-contained) 128 + var cloneStep models.CloneStep 129 + devMode := false // TODO: Make this configurable 130 + 131 + if twf.Clone == nil || !twf.Clone.Skip { 132 + cloneStep = models.BuildCloneStep(twf, *tpl.TriggerMetadata, devMode) 125 133 } 126 134 127 135 // Store pre-computed workflow data
+73 -19
internal/jobbuilder/job_template.go
··· 194 194 RunAsNonRoot: &[]bool{true}[0], 195 195 RunAsUser: &[]int64{1000}[0], 196 196 FSGroup: &[]int64{1000}[0], 197 - // Note: User namespaces (hostUsers: false) for enhanced buildah rootless 198 - // operation requires Kubernetes 1.33+ and is not yet available in the 199 - // current API version. Buildah will still work in rootless mode without it. 197 + // Unconfined seccomp is required for buildah to create user namespaces 198 + // via unshare(CLONE_NEWUSER). The container is still hardened with: 199 + // - RunAsNonRoot, RunAsUser 1000 200 + // - AllowPrivilegeEscalation: false 201 + // - Capabilities: Drop ALL 200 202 SeccompProfile: &corev1.SeccompProfile{ 201 - Type: corev1.SeccompProfileTypeRuntimeDefault, 203 + Type: corev1.SeccompProfileTypeUnconfined, 202 204 }, 203 205 }, 204 206 // Disable ServiceAccount token mounting for security ··· 223 225 nobody:x:65534: 224 226 runner:x:1000: 225 227 EOF 226 - mkdir -p /home-override/runner 228 + # subuid/subgid mappings for rootless buildah user namespaces 229 + cat > /etc-override/subuid <<'EOF' 230 + runner:100000:65536 231 + EOF 232 + cat > /etc-override/subgid <<'EOF' 233 + runner:100000:65536 234 + EOF 235 + # Create home directory structure then fix ownership 236 + mkdir -p /home-override/runner/.config/containers 237 + chmod 700 /home-override/runner/.config 238 + chown -R 1000:1000 /home-override/runner 227 239 echo "User setup complete" 228 240 `}, 229 241 SecurityContext: &corev1.SecurityContext{ ··· 234 246 RunAsUser: &[]int64{0}[0], 235 247 Capabilities: &corev1.Capabilities{ 236 248 Drop: []corev1.Capability{"ALL"}, 249 + // CAP_CHOWN is needed to set ownership of home directory for UID 1000 250 + Add: []corev1.Capability{"CHOWN"}, 237 251 }, 238 252 }, 239 253 VolumeMounts: []corev1.VolumeMount{ ··· 272 286 Image: "quay.io/buildah/stable:latest", 273 287 Command: []string{"/bin/sh", "-c"}, 274 288 Args: []string{` 275 - # Configure buildah for rootless operation 289 + # Configure buildah storage - native overlay (kernel 6.12+ supports in user namespaces) 276 290 mkdir -p /var/lib/containers/storage 277 291 cat > /var/lib/containers/storage.conf <<'EOF' 278 292 [storage] 279 293 driver = "overlay" 280 294 runroot = "/var/lib/containers/runroot" 281 295 graphroot = "/var/lib/containers/storage" 282 - 283 - [storage.options] 284 - additionalimagestores = [] 285 - 286 - [storage.options.overlay] 287 - mount_program = "/usr/bin/fuse-overlayfs" 288 - mountopt = "nodev,metacopy=on" 289 296 EOF 290 297 291 298 # Copy buildah binary to shared location ··· 293 300 294 301 echo "Buildah configured successfully" 295 302 `}, 303 + Env: []corev1.EnvVar{ 304 + {Name: "HOME", Value: "/home/runner"}, 305 + }, 296 306 SecurityContext: &corev1.SecurityContext{ 297 307 AllowPrivilegeEscalation: &[]bool{false}[0], 298 308 RunAsNonRoot: &[]bool{true}[0], ··· 314 324 Name: "tmp", 315 325 MountPath: "/tmp", 316 326 }, 327 + // Mount passwd/group/subuid/subgid and home directory so buildah 328 + // sees consistent user identity and can write to ~/.config 329 + { 330 + Name: "etc-override", 331 + MountPath: "/etc/passwd", 332 + SubPath: "passwd", 333 + }, 334 + { 335 + Name: "etc-override", 336 + MountPath: "/etc/group", 337 + SubPath: "group", 338 + }, 339 + { 340 + Name: "etc-override", 341 + MountPath: "/etc/subuid", 342 + SubPath: "subuid", 343 + }, 344 + { 345 + Name: "etc-override", 346 + MountPath: "/etc/subgid", 347 + SubPath: "subgid", 348 + }, 349 + { 350 + Name: "home-override", 351 + MountPath: "/home/runner", 352 + SubPath: "runner", 353 + }, 317 354 }, 318 355 }, 319 356 buildCloneInitContainer(config), ··· 329 366 WorkingDir: "/tangled/workspace", 330 367 331 368 SecurityContext: &corev1.SecurityContext{ 332 - AllowPrivilegeEscalation: &[]bool{false}[0], 369 + // AllowPrivilegeEscalation is required for newuidmap/newgidmap 370 + // to set up user namespace mappings for rootless buildah. 371 + AllowPrivilegeEscalation: &[]bool{true}[0], 333 372 RunAsNonRoot: &[]bool{true}[0], 334 373 RunAsUser: &[]int64{1000}[0], 335 374 // Note: ReadOnlyRootFilesystem is NOT set for the runner container ··· 337 376 // (e.g., /go/pkg, ~/.cache, /var/tmp) that we can't predict or mount 338 377 Capabilities: &corev1.Capabilities{ 339 378 Drop: []corev1.Capability{"ALL"}, 379 + // SETUID/SETGID are needed for newuidmap/newgidmap file capabilities 380 + // to work when setting up user namespace mappings for buildah 381 + Add: []corev1.Capability{"SETUID", "SETGID"}, 340 382 }, 341 383 }, 342 384 ··· 344 386 345 387 VolumeMounts: buildRunnerVolumeMounts(config), 346 388 347 - Env: append(buildEnvironmentVariables(config), corev1.EnvVar{ 348 - Name: "LOOM_WORKFLOW_SPEC", 349 - Value: string(workflowSpecJSON), 350 - }), 389 + Env: append(buildEnvironmentVariables(config), 390 + corev1.EnvVar{ 391 + Name: "LOOM_WORKFLOW_SPEC", 392 + Value: string(workflowSpecJSON), 393 + }, 394 + ), 351 395 352 396 // Inject repository secrets via envFrom if available 353 397 EnvFrom: buildEnvFromSources(config), ··· 477 521 Name: "buildah-storage", 478 522 MountPath: "/var/lib/containers", 479 523 }, 480 - // Mount passwd/group files created by setup-user init container 524 + // Mount passwd/group/subuid/subgid files created by setup-user init container 481 525 // This ensures UID 1000 is recognized by tools like buildah 482 526 { 483 527 Name: "etc-override", ··· 488 532 Name: "etc-override", 489 533 MountPath: "/etc/group", 490 534 SubPath: "group", 535 + }, 536 + { 537 + Name: "etc-override", 538 + MountPath: "/etc/subuid", 539 + SubPath: "subuid", 540 + }, 541 + { 542 + Name: "etc-override", 543 + MountPath: "/etc/subgid", 544 + SubPath: "subgid", 491 545 }, 492 546 { 493 547 Name: "home-override",