fix vuln scanner db not refreshing · evan.jarrett.net/at-container-registry@76383ec

+7

pkg/appview/handlers/scan_result.go

··· 34 34 ScannedAt string 35 35 Found bool // true if scan record exists 36 36 Error bool // true if hold unreachable or error 37 + ScanFailed bool // true if scan record exists but scan failed (no blobs) 37 38 Digest string // for the detail modal link 38 39 HoldEndpoint string // for the detail modal link 39 40 } ··· 127 128 return 128 129 } 129 130 131 + // A failed scan has nil blobs (no SBOM generated) and zero counts. 132 + // Successful scans always have an SBOM blob even with 0 vulnerabilities. 133 + scanFailed := scanRecord.SbomBlob == nil && scanRecord.Total == 0 134 + 130 135 h.renderBadge(w, vulnBadgeData{ 131 136 Critical: scanRecord.Critical, 132 137 High: scanRecord.High, ··· 135 140 Total: scanRecord.Total, 136 141 ScannedAt: scanRecord.ScannedAt, 137 142 Found: true, 143 + ScanFailed: scanFailed, 138 144 Digest: digest, 139 145 HoldEndpoint: holdDID, 140 146 }) ··· 194 200 Total: scanRecord.Total, 195 201 ScannedAt: scanRecord.ScannedAt, 196 202 Found: true, 203 + ScanFailed: scanRecord.SbomBlob == nil && scanRecord.Total == 0, 197 204 Digest: fullDigest, 198 205 HoldEndpoint: holdDID, 199 206 }

+7

pkg/appview/handlers/scan_result_test.go

··· 38 38 "total": total, 39 39 "scannerVersion": "atcr-scanner-v1.0.0", 40 40 "scannedAt": "2025-01-15T10:30:00Z", 41 + // Successful scans always have an SBOM blob 42 + "sbomBlob": map[string]any{ 43 + "$type": "blob", 44 + "ref": map[string]any{"$link": "bafkreigv3xw47pk7cbeahkmttetf4smxyluwlu3jmteo2nzke2oa7dbhhm"}, 45 + "mimeType": "application/spdx+json", 46 + "size": 1234, 47 + }, 41 48 } 42 49 envelope := map[string]any{ 43 50 "uri": "at://did:web:hold.example.com/io.atcr.hold.scan/abc123",

+2

pkg/appview/templates/partials/vuln-badge.html

··· 1 1 {{ define "vuln-badge" }} 2 2 {{ if .Error }} 3 3 {{/* Silently hide on error / no scan record — scan badges are non-critical */}} 4 + {{ else if .ScanFailed }} 5 + {{/* Scan failed (no SBOM blob) — don't show misleading "Clean" badge */}} 4 6 {{ else if eq .Total 0 }} 5 7 {{ icon "shield-check" "size-3" }} Clean 6 8 {{ else }}

+40 -20

pkg/hold/admin/handlers_crew.go

··· 9 9 "time" 10 10 11 11 "atcr.io/pkg/atproto" 12 + "atcr.io/pkg/hold/pds" 12 13 "github.com/go-chi/chi/v5" 13 14 ) 14 15 ··· 27 28 AddedAt time.Time 28 29 } 29 30 30 - // CrewSkeletonView is the minimal crew member data for skeleton rendering. 31 - // Contains only data available from the MST walk (no network calls). 32 - type CrewSkeletonView struct { 33 - RKey string 34 - DID string 35 - Role string 36 - Permissions []string 37 - Tier string 38 - AddedAt time.Time 39 - } 40 - 41 31 // resolveHandle attempts to resolve a DID to a handle 42 32 // Returns empty string if resolution fails 43 33 func resolveHandle(ctx context.Context, did string) string { ··· 61 51 } 62 52 63 53 // handleCrewTab returns the crew tab content (HTMX partial). 64 - // Only does the MST walk — no handle resolution or usage queries. 65 - // Each row lazy-loads its details via handleCrewMemberInfo. 54 + // Includes usage data (fast bulk SQL query) for correct sort order. 55 + // Handles are lazy-loaded per-row via handleCrewMemberInfo. 66 56 func (ui *AdminUI) handleCrewTab(w http.ResponseWriter, r *http.Request) { 67 57 crew, err := ui.pds.ListCrewMembers(r.Context()) 68 58 if err != nil { ··· 70 60 return 71 61 } 72 62 63 + allQuotas, err := ui.pds.GetAllUserQuotas(r.Context()) 64 + if err != nil { 65 + slog.Warn("Failed to get user quotas for crew tab", "error", err) 66 + allQuotas = make(map[string]*pds.QuotaStats) 67 + } 68 + 73 69 defaultTier := "default" 74 70 if ui.quotaMgr != nil && ui.quotaMgr.IsEnabled() { 75 71 defaultTier = ui.quotaMgr.GetDefaultTier() 76 72 } 77 73 78 - var skeletons []CrewSkeletonView 74 + var crewViews []CrewMemberView 79 75 for _, member := range crew { 80 76 tier := member.Record.Tier 81 77 if tier == "" { 82 78 tier = defaultTier 83 79 } 84 - skeletons = append(skeletons, CrewSkeletonView{ 80 + 81 + view := CrewMemberView{ 85 82 RKey: member.Rkey, 86 83 DID: member.Record.Member, 87 84 Role: member.Record.Role, 88 85 Permissions: member.Record.Permissions, 89 86 Tier: tier, 90 87 AddedAt: parseTime(member.Record.AddedAt), 91 - }) 88 + } 89 + 90 + usage := int64(0) 91 + if q, ok := allQuotas[member.Record.Member]; ok { 92 + usage = q.TotalSize 93 + } 94 + 95 + if ui.quotaMgr != nil && ui.quotaMgr.IsEnabled() { 96 + if limit := ui.quotaMgr.GetTierLimit(tier); limit != nil { 97 + view.TierLimit = formatHumanBytes(*limit) 98 + if *limit > 0 { 99 + view.UsagePercent = int(float64(usage) / float64(*limit) * 100) 100 + } 101 + } else { 102 + view.TierLimit = "Unlimited" 103 + } 104 + } else { 105 + view.TierLimit = "Unlimited" 106 + } 107 + 108 + view.CurrentUsage = usage 109 + view.UsageHuman = formatHumanBytes(usage) 110 + 111 + crewViews = append(crewViews, view) 92 112 } 93 113 94 - sort.Slice(skeletons, func(i, j int) bool { 95 - return skeletons[i].AddedAt.After(skeletons[j].AddedAt) 114 + sort.Slice(crewViews, func(i, j int) bool { 115 + return crewViews[i].CurrentUsage > crewViews[j].CurrentUsage 96 116 }) 97 117 98 118 data := struct { 99 - Crew []CrewSkeletonView 119 + Crew []CrewMemberView 100 120 }{ 101 - Crew: skeletons, 121 + Crew: crewViews, 102 122 } 103 123 ui.renderTemplate(w, "partials/tab_crew.html", data) 104 124 }

+7 -2

pkg/hold/admin/templates/partials/tab_crew.html

··· 55 55 </td> 56 56 <td> 57 57 {{.Tier}} 58 + {{.TierLimit}} 58 59 </td> 59 - <td class="text-base-content/30 text-sm"> 60 - 60 + <td> 61 + <div class="flex flex-col gap-1 min-w-24"> 62 + {{.UsageHuman}} 63 + <progress class="progress {{if gt .UsagePercent 90}}progress-error{{else if gt .UsagePercent 75}}progress-warning{{else}}progress-primary{{end}} w-full" value="{{.UsagePercent}}" max="100"></progress> 64 + {{.UsagePercent}}% 65 + </div> 61 66 </td> 62 67 <td class="text-sm text-base-content/70">{{formatTime .AddedAt}}</td> 63 68 <td></td>

+4 -3

pkg/hold/pds/scan_broadcaster.go

··· 567 567 "seq", msg.Seq, "error", err) 568 568 } else { 569 569 // Create a scan record with zero counts and nil blobs — marks it as 570 - // "scanned" so the proactive scheduler won't retry until rescan interval 570 + // "scanned" so the proactive scheduler won't retry until rescan interval. 571 + // Nil blobs signal failure to the appview (successful scans always have blobs). 571 572 scanRecord := atproto.NewScanRecord( 572 573 manifestDigest, repository, userDID, 573 - nil, nil, // no SBOM or vuln report 574 + nil, nil, // no SBOM or vuln report — signals scan failure 574 575 0, 0, 0, 0, 0, 575 - "failed: "+truncateError(msg.Error, 200), 576 + "atcr-scanner-v1.0.0", 576 577 ) 577 578 if _, _, err := sb.pds.CreateScanRecord(ctx, scanRecord); err != nil { 578 579 slog.Error("Failed to store failure scan record",

+42 -20

scanner/internal/scan/grype.go

··· 32 32 33 33 // Global vulnerability database (shared across workers) 34 34 var ( 35 - vulnDB vulnerability.Provider 36 - vulnDBLock sync.RWMutex 35 + vulnDB vulnerability.Provider 36 + vulnDBLock sync.RWMutex 37 + vulnDBLoaded time.Time // when the current vulnDB was loaded 37 38 ) 39 + 40 + // vulnDBRefreshAge is how long a cached DB is considered fresh. 41 + // Set 1 day before the 5-day MaxAllowedBuiltAge so we refresh proactively. 42 + const vulnDBRefreshAge = 4 * 24 * time.Hour 38 43 39 44 // scanVulnerabilities scans an SBOM for vulnerabilities using Grype 40 45 func scanVulnerabilities(ctx context.Context, s *sbom.SBOM, vulnDBPath string) ([]byte, string, scanner.VulnerabilitySummary, error) { ··· 119 124 return reportJSON, digest, summary, nil 120 125 } 121 126 122 - // loadVulnDatabase loads the Grype vulnerability database (with caching) 127 + // loadVulnDatabase loads the Grype vulnerability database with caching and 128 + // automatic refresh. The cached DB is returned if loaded less than 129 + // vulnDBRefreshAge ago. On a stale or missing DB, it downloads a fresh copy. 123 130 func loadVulnDatabase(ctx context.Context, vulnDBPath string) (vulnerability.Provider, error) { 124 131 vulnDBLock.RLock() 125 - if vulnDB != nil { 132 + if vulnDB != nil && time.Since(vulnDBLoaded) < vulnDBRefreshAge { 126 133 vulnDBLock.RUnlock() 127 134 return vulnDB, nil 128 135 } ··· 131 138 vulnDBLock.Lock() 132 139 defer vulnDBLock.Unlock() 133 140 134 - if vulnDB != nil { 141 + // Double-check after acquiring write lock 142 + if vulnDB != nil && time.Since(vulnDBLoaded) < vulnDBRefreshAge { 135 143 return vulnDB, nil 136 144 } 137 145 ··· 149 157 MaxAllowedBuiltAge: 5 * 24 * time.Hour, // 5 days 150 158 } 151 159 160 + // Try loading existing DB first (no network) 152 161 store, status, err := grype.LoadVulnerabilityDB(distConfig, installConfig, false) 153 162 if err != nil { 154 - return nil, fmt.Errorf("failed to load vulnerability database (status=%v): %w", status, err) 163 + slog.Warn("Vulnerability database load failed, attempting update", "error", err) 164 + 165 + // Download fresh DB 166 + if updateErr := updateVulnDatabase(vulnDBPath); updateErr != nil { 167 + return nil, fmt.Errorf("failed to update vulnerability database: %w (original: %w)", updateErr, err) 168 + } 169 + 170 + // Retry loading after update 171 + store, status, err = grype.LoadVulnerabilityDB(distConfig, installConfig, false) 172 + if err != nil { 173 + return nil, fmt.Errorf("failed to load vulnerability database after update (status=%v): %w", status, err) 174 + } 155 175 } 156 176 157 177 slog.Info("Vulnerability database loaded", ··· 159 179 "schemaVersion", status.SchemaVersion) 160 180 161 181 vulnDB = store 182 + vulnDBLoaded = time.Now() 162 183 return vulnDB, nil 163 184 } 164 185 165 - // initializeVulnDatabase downloads the vulnerability database on startup 186 + // initializeVulnDatabase ensures a fresh vulnerability database exists on startup. 166 187 func initializeVulnDatabase(vulnDBPath, tmpDir string) error { 167 188 slog.Info("Initializing vulnerability database", "path", vulnDBPath) 168 - 169 - if err := os.MkdirAll(vulnDBPath, 0755); err != nil { 170 - return fmt.Errorf("failed to create database directory: %w", err) 171 - } 172 189 173 190 grpeTmpDir := filepath.Join(tmpDir, "grype-dl") 174 191 if err := os.MkdirAll(grpeTmpDir, 0755); err != nil { ··· 185 202 } 186 203 }() 187 204 205 + return updateVulnDatabase(vulnDBPath) 206 + } 207 + 208 + // updateVulnDatabase downloads a fresh vulnerability database if needed. 209 + // The curator internally checks whether an update is necessary (DB missing, 210 + // stale, or update-check cooldown expired) so this is safe to call often. 211 + func updateVulnDatabase(vulnDBPath string) error { 212 + if err := os.MkdirAll(vulnDBPath, 0755); err != nil { 213 + return fmt.Errorf("failed to create database directory: %w", err) 214 + } 215 + 188 216 distConfig := distribution.DefaultConfig() 189 217 installConfig := installation.Config{ 190 218 DBRootDir: vulnDBPath, ··· 203 231 return fmt.Errorf("failed to create database curator: %w", err) 204 232 } 205 233 206 - status := curator.Status() 207 - if !status.Built.IsZero() && status.Error == nil { 208 - slog.Info("Vulnerability database already exists", "built", status.Built) 209 - return nil 210 - } 211 - 212 - slog.Info("Downloading vulnerability database (this may take 5-10 minutes)...") 234 + slog.Info("Checking vulnerability database for updates...") 213 235 updated, err := curator.Update() 214 236 if err != nil { 215 - return fmt.Errorf("failed to download vulnerability database: %w", err) 237 + return fmt.Errorf("failed to update vulnerability database: %w", err) 216 238 } 217 239 218 240 if updated { 219 - slog.Info("Vulnerability database downloaded successfully") 241 + slog.Info("Vulnerability database updated successfully") 220 242 } else { 221 243 slog.Info("Vulnerability database is up to date") 222 244 }

+17 -1

scanner/internal/scan/worker.go

··· 7 7 "fmt" 8 8 "log/slog" 9 9 "os" 10 + "strings" 10 11 "sync" 11 12 "time" 12 13 ··· 83 84 84 85 result, err := wp.processJob(ctx, job) 85 86 if err != nil { 86 - slog.Error("Scan job failed", 87 + logLevel := slog.LevelError 88 + if strings.HasPrefix(err.Error(), "skipped:") { 89 + logLevel = slog.LevelInfo 90 + } 91 + slog.Log(ctx, logLevel, "Scan job failed", 87 92 "worker_id", id, 88 93 "repository", job.Repository, 89 94 "error", err) ··· 100 105 } 101 106 } 102 107 108 + // unscannable config media types — these are OCI artifacts that aren't 109 + // container images so Syft/Grype can't analyze their layers. 110 + var unscannableConfigTypes = map[string]bool{ 111 + "application/vnd.cncf.helm.config.v1+json": true, // Helm charts 112 + } 113 + 103 114 func (wp *WorkerPool) processJob(ctx context.Context, job *scanner.ScanJob) (*scanner.ScanResult, error) { 104 115 startTime := time.Now() 116 + 117 + // Skip non-container OCI artifacts (Helm charts, WASM modules, etc.) 118 + if unscannableConfigTypes[job.Config.MediaType] { 119 + return nil, fmt.Errorf("skipped: unscannable artifact type %s", job.Config.MediaType) 120 + } 105 121 106 122 // Ensure tmp dir exists 107 123 if err := ensureDir(wp.cfg.Vuln.TmpDir); err != nil {