Kubernetes Operator for Tangled Spindles

update configmap an spindleset crd

evan.jarrett.net e6ff2e1e e61d53ee

verified
+593 -114
+5
.tangled/workflows/workflow-amd64.yaml
··· 1 + when: 2 + - event: ["push"] 3 + tag: ["v*"] 4 + branch : ["*"] 5 + 1 6 engine: kubernetes 2 7 image: golang:1.25-trixie 3 8 architecture: amd64
+2 -1
.tangled/workflows/workflow-arm64.yaml
··· 1 1 when: 2 - - event: ["manual"] 2 + - event: ["push"] 3 3 tag: ["v*"] 4 + branch : ["*"] 4 5 5 6 engine: kubernetes 6 7 image: golang:1.25-trixie
-1
Dockerfile
··· 18 18 COPY loom/cmd/controller/main.go cmd/controller/main.go 19 19 COPY loom/api/ api/ 20 20 COPY loom/internal/ internal/ 21 - COPY loom/pkg/ pkg/ 22 21 23 22 # Build 24 23 # CGO is required for go-sqlite3
+21 -8
api/v1alpha1/spindleset_types.go
··· 137 137 Nixpkgs []string `json:"nixpkgs,omitempty"` 138 138 } 139 139 140 + // ResourceProfile defines a resource configuration for spindle jobs based on node labels. 141 + // Profiles are matched against workflow architecture and applied to job pods. 142 + type ResourceProfile struct { 143 + // NodeSelector defines labels that must match for this profile to be used. 144 + // Must include kubernetes.io/arch to match workflow architecture. 145 + // Additional labels allow differentiation between node types (e.g., node-tier, instance-type). 146 + // +kubebuilder:validation:Required 147 + NodeSelector map[string]string `json:"nodeSelector"` 148 + 149 + // Resources defines the compute resource requirements for jobs using this profile. 150 + // +kubebuilder:validation:Required 151 + Resources corev1.ResourceRequirements `json:"resources"` 152 + } 153 + 140 154 // SpindleTemplate defines the pod template configuration for spindle jobs. 141 155 // This is configured via ConfigMap and used internally by the engine. 142 156 type SpindleTemplate struct { 143 - // Resources defines the compute resource requirements for spindle jobs. 144 - Resources corev1.ResourceRequirements `json:"resources,omitempty"` 145 - 146 - // NodeSelector is a selector which must be true for the pod to fit on a node. 147 - // For MVP, this is not exposed via ConfigMap. 148 - NodeSelector map[string]string `json:"nodeSelector,omitempty"` 157 + // ResourceProfiles is an ordered list of resource configurations based on node labels. 158 + // When creating a job, the first profile matching the workflow's architecture is selected. 159 + // The profile's nodeSelector and resources are applied to the job pod. 160 + // +optional 161 + ResourceProfiles []ResourceProfile `json:"resourceProfiles,omitempty"` 149 162 150 163 // Tolerations allows pods to schedule onto nodes with matching taints. 151 - // For MVP, this is not exposed via ConfigMap. 164 + // +optional 152 165 Tolerations []corev1.Toleration `json:"tolerations,omitempty"` 153 166 154 167 // Affinity defines scheduling constraints for spindle job pods. 155 - // For MVP, this is not exposed via ConfigMap. 168 + // +optional 156 169 Affinity *corev1.Affinity `json:"affinity,omitempty"` 157 170 } 158 171
+28 -6
api/v1alpha1/zz_generated.deepcopy.go
··· 54 54 } 55 55 56 56 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 57 + func (in *ResourceProfile) DeepCopyInto(out *ResourceProfile) { 58 + *out = *in 59 + if in.NodeSelector != nil { 60 + in, out := &in.NodeSelector, &out.NodeSelector 61 + *out = make(map[string]string, len(*in)) 62 + for key, val := range *in { 63 + (*out)[key] = val 64 + } 65 + } 66 + in.Resources.DeepCopyInto(&out.Resources) 67 + } 68 + 69 + // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceProfile. 70 + func (in *ResourceProfile) DeepCopy() *ResourceProfile { 71 + if in == nil { 72 + return nil 73 + } 74 + out := new(ResourceProfile) 75 + in.DeepCopyInto(out) 76 + return out 77 + } 78 + 79 + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 57 80 func (in *SpindleSet) DeepCopyInto(out *SpindleSet) { 58 81 *out = *in 59 82 out.TypeMeta = in.TypeMeta ··· 165 188 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 166 189 func (in *SpindleTemplate) DeepCopyInto(out *SpindleTemplate) { 167 190 *out = *in 168 - in.Resources.DeepCopyInto(&out.Resources) 169 - if in.NodeSelector != nil { 170 - in, out := &in.NodeSelector, &out.NodeSelector 171 - *out = make(map[string]string, len(*in)) 172 - for key, val := range *in { 173 - (*out)[key] = val 191 + if in.ResourceProfiles != nil { 192 + in, out := &in.ResourceProfiles, &out.ResourceProfiles 193 + *out = make([]ResourceProfile, len(*in)) 194 + for i := range *in { 195 + (*in)[i].DeepCopyInto(&(*out)[i]) 174 196 } 175 197 } 176 198 if in.Tolerations != nil {
+31 -7
cmd/controller/main.go
··· 60 60 61 61 // LoomTemplateConfig holds job template configuration 62 62 type LoomTemplateConfig struct { 63 - Resources ResourceConfig `yaml:"resources"` 63 + ResourceProfiles []ResourceProfileConfig `yaml:"resourceProfiles"` 64 + } 65 + 66 + // ResourceProfileConfig holds a resource profile from ConfigMap 67 + type ResourceProfileConfig struct { 68 + NodeSelector map[string]string `yaml:"nodeSelector"` 69 + Resources ResourceConfig `yaml:"resources"` 64 70 } 65 71 66 72 // ResourceConfig holds resource requirements as strings for parsing ··· 146 152 return reqs, nil 147 153 } 148 154 155 + // convertToResourceProfiles converts ConfigMap profiles to API ResourceProfiles 156 + func convertToResourceProfiles(profiles []ResourceProfileConfig) ([]loomv1alpha1.ResourceProfile, error) { 157 + result := make([]loomv1alpha1.ResourceProfile, 0, len(profiles)) 158 + 159 + for i, p := range profiles { 160 + resources, err := convertToResourceRequirements(p.Resources) 161 + if err != nil { 162 + return nil, fmt.Errorf("failed to convert profile %d resources: %w", i, err) 163 + } 164 + 165 + result = append(result, loomv1alpha1.ResourceProfile{ 166 + NodeSelector: p.NodeSelector, 167 + Resources: resources, 168 + }) 169 + } 170 + 171 + return result, nil 172 + } 173 + 149 174 // initializeSpindle creates a spindle server with KubernetesEngine 150 175 func initializeSpindle(ctx context.Context, cfg *config.Config, mgr ctrl.Manager, loomCfg *LoomConfig) (*spindle.Spindle, error) { 151 176 // Initialize Kubernetes engine ··· 155 180 namespace = "default" 156 181 } 157 182 158 - // Convert resource config to Kubernetes types 159 - resources, err := convertToResourceRequirements(loomCfg.Template.Resources) 183 + // Convert resource profiles to Kubernetes types 184 + profiles, err := convertToResourceProfiles(loomCfg.Template.ResourceProfiles) 160 185 if err != nil { 161 - return nil, fmt.Errorf("failed to convert resource requirements: %w", err) 186 + return nil, fmt.Errorf("failed to convert resource profiles: %w", err) 162 187 } 163 188 164 189 // Create template from loom config 165 190 template := loomv1alpha1.SpindleTemplate{ 166 - Resources: resources, 191 + ResourceProfiles: profiles, 167 192 } 168 193 169 194 kubeEngine := engine.NewKubernetesEngine(mgr.GetClient(), mgr.GetConfig(), namespace, template) ··· 317 342 } 318 343 setupLog.Info("Loom configuration loaded", 319 344 "maxConcurrentJobs", loomCfg.MaxConcurrentJobs, 320 - "cpuRequest", loomCfg.Template.Resources.Requests.CPU, 321 - "memoryRequest", loomCfg.Template.Resources.Requests.Memory) 345 + "resourceProfiles", len(loomCfg.Template.ResourceProfiles)) 322 346 323 347 // Load spindle configuration from environment 324 348 spindleCfg, err := config.Load(ctx)
+82 -67
config/crd/bases/loom.j5t.io_spindlesets.yaml
··· 198 198 Set internally by the engine from ConfigMap configuration. 199 199 properties: 200 200 affinity: 201 - description: |- 202 - Affinity defines scheduling constraints for spindle job pods. 203 - For MVP, this is not exposed via ConfigMap. 201 + description: Affinity defines scheduling constraints for spindle 202 + job pods. 204 203 properties: 205 204 nodeAffinity: 206 205 description: Describes node affinity scheduling rules for ··· 1118 1117 x-kubernetes-list-type: atomic 1119 1118 type: object 1120 1119 type: object 1121 - nodeSelector: 1122 - additionalProperties: 1123 - type: string 1120 + resourceProfiles: 1124 1121 description: |- 1125 - NodeSelector is a selector which must be true for the pod to fit on a node. 1126 - For MVP, this is not exposed via ConfigMap. 1127 - type: object 1128 - resources: 1129 - description: Resources defines the compute resource requirements 1130 - for spindle jobs. 1131 - properties: 1132 - claims: 1133 - description: |- 1134 - Claims lists the names of resources, defined in spec.resourceClaims, 1135 - that are used by this container. 1122 + ResourceProfiles is an ordered list of resource configurations based on node labels. 1123 + When creating a job, the first profile matching the workflow's architecture is selected. 1124 + The profile's nodeSelector and resources are applied to the job pod. 1125 + items: 1126 + description: |- 1127 + ResourceProfile defines a resource configuration for spindle jobs based on node labels. 1128 + Profiles are matched against workflow architecture and applied to job pods. 1129 + properties: 1130 + nodeSelector: 1131 + additionalProperties: 1132 + type: string 1133 + description: |- 1134 + NodeSelector defines labels that must match for this profile to be used. 1135 + Must include kubernetes.io/arch to match workflow architecture. 1136 + Additional labels allow differentiation between node types (e.g., node-tier, instance-type). 1137 + type: object 1138 + resources: 1139 + description: Resources defines the compute resource requirements 1140 + for jobs using this profile. 1141 + properties: 1142 + claims: 1143 + description: |- 1144 + Claims lists the names of resources, defined in spec.resourceClaims, 1145 + that are used by this container. 1136 1146 1137 - This is an alpha field and requires enabling the 1138 - DynamicResourceAllocation feature gate. 1147 + This is an alpha field and requires enabling the 1148 + DynamicResourceAllocation feature gate. 1139 1149 1140 - This field is immutable. It can only be set for containers. 1141 - items: 1142 - description: ResourceClaim references one entry in PodSpec.ResourceClaims. 1143 - properties: 1144 - name: 1150 + This field is immutable. It can only be set for containers. 1151 + items: 1152 + description: ResourceClaim references one entry in 1153 + PodSpec.ResourceClaims. 1154 + properties: 1155 + name: 1156 + description: |- 1157 + Name must match the name of one entry in pod.spec.resourceClaims of 1158 + the Pod where this field is used. It makes that resource available 1159 + inside a container. 1160 + type: string 1161 + request: 1162 + description: |- 1163 + Request is the name chosen for a request in the referenced claim. 1164 + If empty, everything from the claim is made available, otherwise 1165 + only the result of this request. 1166 + type: string 1167 + required: 1168 + - name 1169 + type: object 1170 + type: array 1171 + x-kubernetes-list-map-keys: 1172 + - name 1173 + x-kubernetes-list-type: map 1174 + limits: 1175 + additionalProperties: 1176 + anyOf: 1177 + - type: integer 1178 + - type: string 1179 + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ 1180 + x-kubernetes-int-or-string: true 1145 1181 description: |- 1146 - Name must match the name of one entry in pod.spec.resourceClaims of 1147 - the Pod where this field is used. It makes that resource available 1148 - inside a container. 1149 - type: string 1150 - request: 1182 + Limits describes the maximum amount of compute resources allowed. 1183 + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ 1184 + type: object 1185 + requests: 1186 + additionalProperties: 1187 + anyOf: 1188 + - type: integer 1189 + - type: string 1190 + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ 1191 + x-kubernetes-int-or-string: true 1151 1192 description: |- 1152 - Request is the name chosen for a request in the referenced claim. 1153 - If empty, everything from the claim is made available, otherwise 1154 - only the result of this request. 1155 - type: string 1156 - required: 1157 - - name 1193 + Requests describes the minimum amount of compute resources required. 1194 + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, 1195 + otherwise to an implementation-defined value. Requests cannot exceed Limits. 1196 + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ 1197 + type: object 1158 1198 type: object 1159 - type: array 1160 - x-kubernetes-list-map-keys: 1161 - - name 1162 - x-kubernetes-list-type: map 1163 - limits: 1164 - additionalProperties: 1165 - anyOf: 1166 - - type: integer 1167 - - type: string 1168 - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ 1169 - x-kubernetes-int-or-string: true 1170 - description: |- 1171 - Limits describes the maximum amount of compute resources allowed. 1172 - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ 1173 - type: object 1174 - requests: 1175 - additionalProperties: 1176 - anyOf: 1177 - - type: integer 1178 - - type: string 1179 - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ 1180 - x-kubernetes-int-or-string: true 1181 - description: |- 1182 - Requests describes the minimum amount of compute resources required. 1183 - If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, 1184 - otherwise to an implementation-defined value. Requests cannot exceed Limits. 1185 - More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ 1186 - type: object 1187 - type: object 1199 + required: 1200 + - nodeSelector 1201 + - resources 1202 + type: object 1203 + type: array 1188 1204 tolerations: 1189 - description: |- 1190 - Tolerations allows pods to schedule onto nodes with matching taints. 1191 - For MVP, this is not exposed via ConfigMap. 1205 + description: Tolerations allows pods to schedule onto nodes with 1206 + matching taints. 1192 1207 items: 1193 1208 description: |- 1194 1209 The pod this Toleration is attached to tolerates any taint that matches
+1 -1
config/manager/kustomization.yaml
··· 8 8 images: 9 9 - name: controller 10 10 newName: atcr.io/evan.jarrett.net/loom 11 - newTag: debug 11 + newTag: latest
+38 -8
config/manager/loom-config.yaml
··· 8 8 # Maximum number of concurrent spindle jobs 9 9 maxConcurrentJobs: 5 10 10 11 - # Default template for spindle job pods 11 + # Template for spindle job pods 12 12 template: 13 - resources: 14 - requests: 15 - cpu: "1" 16 - memory: "1Gi" 17 - limits: 18 - cpu: "3" 19 - memory: "4Gi" 13 + # Resource profiles are matched against workflow architecture and node labels. 14 + # The first profile matching the workflow's architecture is selected. 15 + # Profile's nodeSelector and resources are applied to the job pod. 16 + resourceProfiles: 17 + # ARM large nodes (8-core, 16GB) - labeled with node-tier: large 18 + - nodeSelector: 19 + kubernetes.io/arch: arm64 20 + node-tier: large 21 + resources: 22 + requests: 23 + cpu: "2" 24 + memory: "4Gi" 25 + limits: 26 + cpu: "6" 27 + memory: "12Gi" 28 + 29 + # ARM small nodes (4-core, 8GB) - fallback for arm64 30 + - nodeSelector: 31 + kubernetes.io/arch: arm64 32 + resources: 33 + requests: 34 + cpu: "1" 35 + memory: "2Gi" 36 + limits: 37 + cpu: "3" 38 + memory: "6Gi" 39 + 40 + # AMD64 nodes 41 + - nodeSelector: 42 + kubernetes.io/arch: amd64 43 + resources: 44 + requests: 45 + cpu: "4" 46 + memory: "4Gi" 47 + limits: 48 + cpu: "8" 49 + memory: "8Gi"
+30 -15
internal/jobbuilder/job_template.go
··· 69 69 Namespace string 70 70 } 71 71 72 + // selectResourceProfile selects the first resource profile matching the workflow architecture. 73 + // Returns the profile's resources and nodeSelector, or default values if no match is found. 74 + func selectResourceProfile(profiles []loomv1alpha1.ResourceProfile, architecture string) (corev1.ResourceRequirements, map[string]string) { 75 + // Iterate through profiles to find first match 76 + for _, profile := range profiles { 77 + // Check if profile's nodeSelector has the matching architecture 78 + if arch, ok := profile.NodeSelector["kubernetes.io/arch"]; ok && arch == architecture { 79 + return profile.Resources, profile.NodeSelector 80 + } 81 + } 82 + 83 + // No profile matched - return defaults 84 + return corev1.ResourceRequirements{ 85 + Requests: corev1.ResourceList{ 86 + corev1.ResourceCPU: resource.MustParse("500m"), 87 + corev1.ResourceMemory: resource.MustParse("1Gi"), 88 + }, 89 + Limits: corev1.ResourceList{ 90 + corev1.ResourceCPU: resource.MustParse("2"), 91 + corev1.ResourceMemory: resource.MustParse("4Gi"), 92 + }, 93 + }, nil 94 + } 95 + 72 96 // BuildJob creates a Kubernetes Job specification for running a spindle workflow. 73 97 func BuildJob(config WorkflowConfig) (*batchv1.Job, error) { 74 98 if config.WorkflowName == "" { ··· 90 114 return nil, fmt.Errorf("failed to marshal workflow spec: %w", err) 91 115 } 92 116 117 + // Select resource profile based on workflow architecture 118 + resources, profileNodeSelector := selectResourceProfile(config.Template.ResourceProfiles, config.Architecture) 119 + 93 120 // Build architecture-based node affinity 94 121 archAffinity := BuildArchitectureAffinity(config.Architecture) 95 122 96 123 // Merge with user-provided affinity from template 97 124 finalAffinity := MergeAffinity(archAffinity, config.Template.Affinity) 98 125 99 - // Default resources if not specified 100 - resources := config.Template.Resources 101 - if resources.Requests == nil && resources.Limits == nil { 102 - resources = corev1.ResourceRequirements{ 103 - Requests: corev1.ResourceList{ 104 - corev1.ResourceCPU: resource.MustParse("500m"), 105 - corev1.ResourceMemory: resource.MustParse("1Gi"), 106 - }, 107 - Limits: corev1.ResourceList{ 108 - corev1.ResourceCPU: resource.MustParse("2"), 109 - corev1.ResourceMemory: resource.MustParse("4Gi"), 110 - }, 111 - } 112 - } 126 + // Use profile's nodeSelector (includes architecture and additional labels from profile) 127 + finalNodeSelector := profileNodeSelector 113 128 114 129 // Job name: spindle-{pipelineID}-{workflowName} (truncated if needed) 115 130 // Sanitize workflow name: remove file extensions and replace dots with hyphens ··· 238 253 }, 239 254 240 255 // Node targeting 241 - NodeSelector: config.Template.NodeSelector, 256 + NodeSelector: finalNodeSelector, 242 257 Tolerations: config.Template.Tolerations, 243 258 Affinity: finalAffinity, 244 259
+355
internal/jobbuilder/job_template_test.go
··· 1 + package jobbuilder 2 + 3 + import ( 4 + "testing" 5 + 6 + corev1 "k8s.io/api/core/v1" 7 + "k8s.io/apimachinery/pkg/api/resource" 8 + 9 + loomv1alpha1 "tangled.org/evan.jarrett.net/loom/api/v1alpha1" 10 + ) 11 + 12 + func TestSelectResourceProfile(t *testing.T) { 13 + tests := []struct { 14 + name string 15 + profiles []loomv1alpha1.ResourceProfile 16 + architecture string 17 + wantCPU string 18 + wantMemory string 19 + wantLabels map[string]string 20 + }{ 21 + { 22 + name: "select arm64 profile", 23 + profiles: []loomv1alpha1.ResourceProfile{ 24 + { 25 + NodeSelector: map[string]string{ 26 + "kubernetes.io/arch": "arm64", 27 + }, 28 + Resources: corev1.ResourceRequirements{ 29 + Requests: corev1.ResourceList{ 30 + corev1.ResourceCPU: resource.MustParse("1"), 31 + corev1.ResourceMemory: resource.MustParse("2Gi"), 32 + }, 33 + Limits: corev1.ResourceList{ 34 + corev1.ResourceCPU: resource.MustParse("2"), 35 + corev1.ResourceMemory: resource.MustParse("4Gi"), 36 + }, 37 + }, 38 + }, 39 + { 40 + NodeSelector: map[string]string{ 41 + "kubernetes.io/arch": "amd64", 42 + }, 43 + Resources: corev1.ResourceRequirements{ 44 + Requests: corev1.ResourceList{ 45 + corev1.ResourceCPU: resource.MustParse("4"), 46 + corev1.ResourceMemory: resource.MustParse("8Gi"), 47 + }, 48 + Limits: corev1.ResourceList{ 49 + corev1.ResourceCPU: resource.MustParse("16"), 50 + corev1.ResourceMemory: resource.MustParse("32Gi"), 51 + }, 52 + }, 53 + }, 54 + }, 55 + architecture: "arm64", 56 + wantCPU: "1", 57 + wantMemory: "2Gi", 58 + wantLabels: map[string]string{ 59 + "kubernetes.io/arch": "arm64", 60 + }, 61 + }, 62 + { 63 + name: "select amd64 profile", 64 + profiles: []loomv1alpha1.ResourceProfile{ 65 + { 66 + NodeSelector: map[string]string{ 67 + "kubernetes.io/arch": "arm64", 68 + }, 69 + Resources: corev1.ResourceRequirements{ 70 + Requests: corev1.ResourceList{ 71 + corev1.ResourceCPU: resource.MustParse("1"), 72 + corev1.ResourceMemory: resource.MustParse("2Gi"), 73 + }, 74 + }, 75 + }, 76 + { 77 + NodeSelector: map[string]string{ 78 + "kubernetes.io/arch": "amd64", 79 + }, 80 + Resources: corev1.ResourceRequirements{ 81 + Requests: corev1.ResourceList{ 82 + corev1.ResourceCPU: resource.MustParse("4"), 83 + corev1.ResourceMemory: resource.MustParse("8Gi"), 84 + }, 85 + }, 86 + }, 87 + }, 88 + architecture: "amd64", 89 + wantCPU: "4", 90 + wantMemory: "8Gi", 91 + wantLabels: map[string]string{ 92 + "kubernetes.io/arch": "amd64", 93 + }, 94 + }, 95 + { 96 + name: "select first matching profile with additional labels", 97 + profiles: []loomv1alpha1.ResourceProfile{ 98 + { 99 + NodeSelector: map[string]string{ 100 + "kubernetes.io/arch": "arm64", 101 + "node-tier": "large", 102 + }, 103 + Resources: corev1.ResourceRequirements{ 104 + Requests: corev1.ResourceList{ 105 + corev1.ResourceCPU: resource.MustParse("4"), 106 + corev1.ResourceMemory: resource.MustParse("8Gi"), 107 + }, 108 + }, 109 + }, 110 + { 111 + NodeSelector: map[string]string{ 112 + "kubernetes.io/arch": "arm64", 113 + }, 114 + Resources: corev1.ResourceRequirements{ 115 + Requests: corev1.ResourceList{ 116 + corev1.ResourceCPU: resource.MustParse("1"), 117 + corev1.ResourceMemory: resource.MustParse("2Gi"), 118 + }, 119 + }, 120 + }, 121 + }, 122 + architecture: "arm64", 123 + wantCPU: "4", 124 + wantMemory: "8Gi", 125 + wantLabels: map[string]string{ 126 + "kubernetes.io/arch": "arm64", 127 + "node-tier": "large", 128 + }, 129 + }, 130 + { 131 + name: "fallback to defaults when no profile matches", 132 + profiles: []loomv1alpha1.ResourceProfile{ 133 + { 134 + NodeSelector: map[string]string{ 135 + "kubernetes.io/arch": "amd64", 136 + }, 137 + Resources: corev1.ResourceRequirements{ 138 + Requests: corev1.ResourceList{ 139 + corev1.ResourceCPU: resource.MustParse("4"), 140 + corev1.ResourceMemory: resource.MustParse("8Gi"), 141 + }, 142 + }, 143 + }, 144 + }, 145 + architecture: "arm64", 146 + wantCPU: "500m", 147 + wantMemory: "1Gi", 148 + wantLabels: nil, 149 + }, 150 + { 151 + name: "fallback to defaults when no profiles configured", 152 + profiles: []loomv1alpha1.ResourceProfile{}, 153 + architecture: "amd64", 154 + wantCPU: "500m", 155 + wantMemory: "1Gi", 156 + wantLabels: nil, 157 + }, 158 + } 159 + 160 + for _, tt := range tests { 161 + t.Run(tt.name, func(t *testing.T) { 162 + gotResources, gotLabels := selectResourceProfile(tt.profiles, tt.architecture) 163 + 164 + // Check CPU request 165 + gotCPU := gotResources.Requests[corev1.ResourceCPU] 166 + wantCPU := resource.MustParse(tt.wantCPU) 167 + if !gotCPU.Equal(wantCPU) { 168 + t.Errorf("selectResourceProfile() CPU = %v, want %v", gotCPU.String(), wantCPU.String()) 169 + } 170 + 171 + // Check memory request 172 + gotMemory := gotResources.Requests[corev1.ResourceMemory] 173 + wantMemory := resource.MustParse(tt.wantMemory) 174 + if !gotMemory.Equal(wantMemory) { 175 + t.Errorf("selectResourceProfile() Memory = %v, want %v", gotMemory.String(), wantMemory.String()) 176 + } 177 + 178 + // Check labels 179 + if len(gotLabels) != len(tt.wantLabels) { 180 + t.Errorf("selectResourceProfile() labels count = %d, want %d", len(gotLabels), len(tt.wantLabels)) 181 + } 182 + for k, wantV := range tt.wantLabels { 183 + if gotV, ok := gotLabels[k]; !ok || gotV != wantV { 184 + t.Errorf("selectResourceProfile() label[%s] = %v, want %v", k, gotV, wantV) 185 + } 186 + } 187 + }) 188 + } 189 + } 190 + 191 + func TestBuildJob(t *testing.T) { 192 + tests := []struct { 193 + name string 194 + config WorkflowConfig 195 + wantCPU string 196 + wantMemory string 197 + wantNodeSelector map[string]string 198 + wantErr bool 199 + }{ 200 + { 201 + name: "use arm64 profile with additional labels", 202 + config: WorkflowConfig{ 203 + WorkflowName: "test-workflow", 204 + PipelineID: "test-pipeline", 205 + SpindleSetName: "test-spindleset", 206 + Image: "test:latest", 207 + Architecture: "arm64", 208 + WorkflowSpec: loomv1alpha1.WorkflowSpec{Name: "test"}, 209 + Namespace: "default", 210 + Template: loomv1alpha1.SpindleTemplate{ 211 + ResourceProfiles: []loomv1alpha1.ResourceProfile{ 212 + { 213 + NodeSelector: map[string]string{ 214 + "kubernetes.io/arch": "arm64", 215 + "node-tier": "large", 216 + }, 217 + Resources: corev1.ResourceRequirements{ 218 + Requests: corev1.ResourceList{ 219 + corev1.ResourceCPU: resource.MustParse("2"), 220 + corev1.ResourceMemory: resource.MustParse("4Gi"), 221 + }, 222 + }, 223 + }, 224 + }, 225 + }, 226 + }, 227 + wantCPU: "2", 228 + wantMemory: "4Gi", 229 + wantNodeSelector: map[string]string{ 230 + "kubernetes.io/arch": "arm64", 231 + "node-tier": "large", 232 + }, 233 + wantErr: false, 234 + }, 235 + { 236 + name: "use amd64 profile", 237 + config: WorkflowConfig{ 238 + WorkflowName: "test-workflow", 239 + PipelineID: "test-pipeline", 240 + SpindleSetName: "test-spindleset", 241 + Image: "test:latest", 242 + Architecture: "amd64", 243 + WorkflowSpec: loomv1alpha1.WorkflowSpec{Name: "test"}, 244 + Namespace: "default", 245 + Template: loomv1alpha1.SpindleTemplate{ 246 + ResourceProfiles: []loomv1alpha1.ResourceProfile{ 247 + { 248 + NodeSelector: map[string]string{ 249 + "kubernetes.io/arch": "arm64", 250 + }, 251 + Resources: corev1.ResourceRequirements{ 252 + Requests: corev1.ResourceList{ 253 + corev1.ResourceCPU: resource.MustParse("1"), 254 + corev1.ResourceMemory: resource.MustParse("2Gi"), 255 + }, 256 + }, 257 + }, 258 + { 259 + NodeSelector: map[string]string{ 260 + "kubernetes.io/arch": "amd64", 261 + }, 262 + Resources: corev1.ResourceRequirements{ 263 + Requests: corev1.ResourceList{ 264 + corev1.ResourceCPU: resource.MustParse("4"), 265 + corev1.ResourceMemory: resource.MustParse("8Gi"), 266 + }, 267 + }, 268 + }, 269 + }, 270 + }, 271 + }, 272 + wantCPU: "4", 273 + wantMemory: "8Gi", 274 + wantNodeSelector: map[string]string{ 275 + "kubernetes.io/arch": "amd64", 276 + }, 277 + wantErr: false, 278 + }, 279 + { 280 + name: "profile with multiple labels", 281 + config: WorkflowConfig{ 282 + WorkflowName: "test-workflow", 283 + PipelineID: "test-pipeline", 284 + SpindleSetName: "test-spindleset", 285 + Image: "test:latest", 286 + Architecture: "arm64", 287 + WorkflowSpec: loomv1alpha1.WorkflowSpec{Name: "test"}, 288 + Namespace: "default", 289 + Template: loomv1alpha1.SpindleTemplate{ 290 + ResourceProfiles: []loomv1alpha1.ResourceProfile{ 291 + { 292 + NodeSelector: map[string]string{ 293 + "kubernetes.io/arch": "arm64", 294 + "node-tier": "large", 295 + "custom-label": "custom-value", 296 + }, 297 + Resources: corev1.ResourceRequirements{ 298 + Requests: corev1.ResourceList{ 299 + corev1.ResourceCPU: resource.MustParse("1"), 300 + corev1.ResourceMemory: resource.MustParse("2Gi"), 301 + }, 302 + }, 303 + }, 304 + }, 305 + }, 306 + }, 307 + wantCPU: "1", 308 + wantMemory: "2Gi", 309 + wantNodeSelector: map[string]string{ 310 + "kubernetes.io/arch": "arm64", 311 + "node-tier": "large", 312 + "custom-label": "custom-value", 313 + }, 314 + wantErr: false, 315 + }, 316 + } 317 + 318 + for _, tt := range tests { 319 + t.Run(tt.name, func(t *testing.T) { 320 + job, err := BuildJob(tt.config) 321 + if (err != nil) != tt.wantErr { 322 + t.Errorf("BuildJob() error = %v, wantErr %v", err, tt.wantErr) 323 + return 324 + } 325 + if tt.wantErr { 326 + return 327 + } 328 + 329 + // Check resources 330 + container := job.Spec.Template.Spec.Containers[0] 331 + gotCPU := container.Resources.Requests[corev1.ResourceCPU] 332 + wantCPU := resource.MustParse(tt.wantCPU) 333 + if !gotCPU.Equal(wantCPU) { 334 + t.Errorf("BuildJob() CPU = %v, want %v", gotCPU.String(), wantCPU.String()) 335 + } 336 + 337 + gotMemory := container.Resources.Requests[corev1.ResourceMemory] 338 + wantMemory := resource.MustParse(tt.wantMemory) 339 + if !gotMemory.Equal(wantMemory) { 340 + t.Errorf("BuildJob() Memory = %v, want %v", gotMemory.String(), wantMemory.String()) 341 + } 342 + 343 + // Check nodeSelector 344 + gotNodeSelector := job.Spec.Template.Spec.NodeSelector 345 + if len(gotNodeSelector) != len(tt.wantNodeSelector) { 346 + t.Errorf("BuildJob() nodeSelector count = %d, want %d", len(gotNodeSelector), len(tt.wantNodeSelector)) 347 + } 348 + for k, wantV := range tt.wantNodeSelector { 349 + if gotV, ok := gotNodeSelector[k]; !ok || gotV != wantV { 350 + t.Errorf("BuildJob() nodeSelector[%s] = %v, want %v", k, gotV, wantV) 351 + } 352 + } 353 + }) 354 + } 355 + }