···77 name: spindle-job-runner
88 namespace: system
99automountServiceAccountToken: false
1010+imagePullSecrets:
1111+- name: atcr-login
1012---
1113# Note: No Role or RoleBinding created intentionally
1214# Job pods should have no permissions to read Secrets, list Pods, etc.
+8-1
internal/controller/spindleset_controller.go
···5555// +kubebuilder:rbac:groups=loom.j5t.io,resources=spindlesets/finalizers,verbs=update
5656// +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete
5757// +kubebuilder:rbac:groups=batch,resources=jobs/status,verbs=get
5858+// +kubebuilder:rbac:groups="",resources=nodes,verbs=list
5859// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch
5960// +kubebuilder:rbac:groups="",resources=pods/log,verbs=get
6061// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch
···345346 }
346347 }
347348349349+ // List nodes for profile selection (to validate nodeSelector labels exist)
350350+ var nodeList corev1.NodeList
351351+ if err := r.Client.List(ctx, &nodeList); err != nil {
352352+ return fmt.Errorf("failed to list nodes: %w", err)
353353+ }
354354+348355 // Convert workflow steps to jobbuilder format and create Jobs for each workflow
349356 for _, workflowSpec := range pipelineRun.Workflows {
350357 // Check if Job already exists
···396403 }
397404398405 // Create the Job
399399- job, err := jobbuilder.BuildJob(jobConfig)
406406+ job, err := jobbuilder.BuildJob(jobConfig, &nodeList)
400407 if err != nil {
401408 return fmt.Errorf("failed to build job for workflow %s: %w", workflowSpec.Name, err)
402409 }
+39-8
internal/jobbuilder/job_template.go
···6161 Namespace string
6262}
63636464-// selectResourceProfile selects the first resource profile matching the workflow architecture.
6464+// nodeMatchesSelector returns true if at least one node has all the labels in selector.
6565+func nodeMatchesSelector(nodes *corev1.NodeList, selector map[string]string) bool {
6666+ if nodes == nil {
6767+ return false
6868+ }
6969+ for _, node := range nodes.Items {
7070+ if labelsMatch(node.Labels, selector) {
7171+ return true
7272+ }
7373+ }
7474+ return false
7575+}
7676+7777+// labelsMatch returns true if nodeLabels contains all key-value pairs from selector.
7878+func labelsMatch(nodeLabels, selector map[string]string) bool {
7979+ for key, value := range selector {
8080+ if nodeLabels[key] != value {
8181+ return false
8282+ }
8383+ }
8484+ return true
8585+}
8686+8787+// selectResourceProfile selects the first resource profile matching the workflow architecture
8888+// and whose nodeSelector labels all exist on at least one available node.
6589// Returns the profile's resources and nodeSelector, or default values if no match is found.
6666-func selectResourceProfile(profiles []loomv1alpha1.ResourceProfile, architecture string) (corev1.ResourceRequirements, map[string]string) {
9090+func selectResourceProfile(profiles []loomv1alpha1.ResourceProfile, architecture string, nodes *corev1.NodeList) (corev1.ResourceRequirements, map[string]string) {
6791 // Iterate through profiles to find first match
6892 for _, profile := range profiles {
6993 // Check if profile's nodeSelector has the matching architecture
7070- if arch, ok := profile.NodeSelector["kubernetes.io/arch"]; ok && arch == architecture {
9494+ arch, ok := profile.NodeSelector["kubernetes.io/arch"]
9595+ if !ok || arch != architecture {
9696+ continue
9797+ }
9898+9999+ // Check if ALL nodeSelector labels exist on at least one node
100100+ if nodeMatchesSelector(nodes, profile.NodeSelector) {
71101 return profile.Resources, profile.NodeSelector
72102 }
73103 }
741047575- // No profile matched - return defaults
105105+ // No profile matched - return defaults with just architecture selector
76106 return corev1.ResourceRequirements{
77107 Requests: corev1.ResourceList{
78108 corev1.ResourceCPU: resource.MustParse("500m"),
···82112 corev1.ResourceCPU: resource.MustParse("2"),
83113 corev1.ResourceMemory: resource.MustParse("4Gi"),
84114 },
8585- }, nil
115115+ }, map[string]string{"kubernetes.io/arch": architecture}
86116}
8711788118// BuildJob creates a Kubernetes Job specification for running a spindle workflow.
8989-func BuildJob(config WorkflowConfig) (*batchv1.Job, error) {
119119+// The nodes parameter is used to validate that resource profile nodeSelectors can be satisfied.
120120+func BuildJob(config WorkflowConfig, nodes *corev1.NodeList) (*batchv1.Job, error) {
90121 if config.WorkflowName == "" {
91122 return nil, fmt.Errorf("workflow name is required")
92123 }
···106137 return nil, fmt.Errorf("failed to marshal workflow spec: %w", err)
107138 }
108139109109- // Select resource profile based on workflow architecture
110110- resources, profileNodeSelector := selectResourceProfile(config.Template.ResourceProfiles, config.Architecture)
140140+ // Select resource profile based on workflow architecture and available nodes
141141+ resources, profileNodeSelector := selectResourceProfile(config.Template.ResourceProfiles, config.Architecture, nodes)
111142112143 // Build architecture-based node affinity
113144 archAffinity := BuildArchitectureAffinity(config.Architecture)