A container registry that uses the AT Protocol for manifest storage and S3 for blob storage.

clean up logs, delete cached data when atproto account is deleted

evan.jarrett.net 1df1bb57 f19dfa27

verified
+236 -76
+1 -5
cmd/appview/serve.go
··· 165 165 go func() { 166 166 // Wait for services to be ready (Docker startup race condition) 167 167 time.Sleep(10 * time.Second) 168 - // Create service token getter callback that uses auth.GetOrFetchServiceToken 169 - getServiceToken := func(ctx context.Context, userDID, holdDID, pdsEndpoint string) (string, error) { 170 - return auth.GetOrFetchServiceToken(ctx, refresher, userDID, holdDID, pdsEndpoint) 171 - } 172 - if err := db.MigrateStatsToHolds(context.Background(), uiDatabase, getServiceToken); err != nil { 168 + if err := db.MigrateStatsToHolds(context.Background(), uiDatabase); err != nil { 173 169 slog.Warn("Stats migration failed", "error", err) 174 170 } 175 171 }()
+21
pkg/appview/db/queries.go
··· 717 717 return err 718 718 } 719 719 720 + // DeleteUserData deletes all cached data for a user. 721 + // This is used when an account is permanently deleted or when we discover 722 + // the account no longer exists (e.g., RepoNotFound during backfill). 723 + // 724 + // Due to ON DELETE CASCADE in the schema, deleting from users will automatically 725 + // cascade to: manifests, tags, layers, references, annotations, stars, repo_pages, etc. 726 + func DeleteUserData(db *sql.DB, did string) error { 727 + result, err := db.Exec(`DELETE FROM users WHERE did = ?`, did) 728 + if err != nil { 729 + return fmt.Errorf("failed to delete user: %w", err) 730 + } 731 + 732 + rowsAffected, _ := result.RowsAffected() 733 + if rowsAffected == 0 { 734 + // User didn't exist, nothing to delete 735 + return nil 736 + } 737 + 738 + return nil 739 + } 740 + 720 741 // GetManifest fetches a single manifest by digest 721 742 // Note: Annotations are stored separately in repository_annotations table 722 743 func GetManifest(db *sql.DB, digest string) (*Manifest, error) {
+108
pkg/appview/db/queries_test.go
··· 1199 1199 }) 1200 1200 } 1201 1201 } 1202 + 1203 + func TestDeleteUserData(t *testing.T) { 1204 + db, err := InitDB(":memory:", true) 1205 + if err != nil { 1206 + t.Fatalf("Failed to init database: %v", err) 1207 + } 1208 + defer db.Close() 1209 + 1210 + // Create test user with related data 1211 + testUser := &User{ 1212 + DID: "did:plc:deleteme", 1213 + Handle: "deleteme.bsky.social", 1214 + PDSEndpoint: "https://test.pds.example.com", 1215 + LastSeen: time.Now(), 1216 + } 1217 + if err := UpsertUser(db, testUser); err != nil { 1218 + t.Fatalf("Failed to insert user: %v", err) 1219 + } 1220 + 1221 + // Add manifest 1222 + manifest := &Manifest{ 1223 + DID: testUser.DID, 1224 + Repository: "myapp", 1225 + Digest: "sha256:abc123", 1226 + HoldEndpoint: "did:web:hold.example.com", 1227 + SchemaVersion: 2, 1228 + MediaType: "application/vnd.oci.image.manifest.v1+json", 1229 + CreatedAt: time.Now(), 1230 + } 1231 + manifestID, err := InsertManifest(db, manifest) 1232 + if err != nil { 1233 + t.Fatalf("Failed to insert manifest: %v", err) 1234 + } 1235 + 1236 + // Add layer 1237 + layer := &Layer{ 1238 + ManifestID: manifestID, 1239 + LayerIndex: 0, 1240 + Digest: "sha256:layer1", 1241 + Size: 1000, 1242 + MediaType: "application/vnd.oci.image.layer.v1.tar+gzip", 1243 + } 1244 + if err := InsertLayer(db, layer); err != nil { 1245 + t.Fatalf("Failed to insert layer: %v", err) 1246 + } 1247 + 1248 + // Add tag 1249 + tag := &Tag{ 1250 + DID: testUser.DID, 1251 + Repository: "myapp", 1252 + Tag: "latest", 1253 + Digest: "sha256:abc123", 1254 + CreatedAt: time.Now(), 1255 + } 1256 + if err := UpsertTag(db, tag); err != nil { 1257 + t.Fatalf("Failed to insert tag: %v", err) 1258 + } 1259 + 1260 + // Add annotations 1261 + if err := UpsertRepositoryAnnotations(db, testUser.DID, "myapp", map[string]string{ 1262 + "org.opencontainers.image.title": "My App", 1263 + }); err != nil { 1264 + t.Fatalf("Failed to insert annotations: %v", err) 1265 + } 1266 + 1267 + // Verify data exists 1268 + var count int 1269 + db.QueryRow(`SELECT COUNT(*) FROM manifests WHERE did = ?`, testUser.DID).Scan(&count) 1270 + if count != 1 { 1271 + t.Fatalf("Expected 1 manifest, got %d", count) 1272 + } 1273 + db.QueryRow(`SELECT COUNT(*) FROM tags WHERE did = ?`, testUser.DID).Scan(&count) 1274 + if count != 1 { 1275 + t.Fatalf("Expected 1 tag, got %d", count) 1276 + } 1277 + db.QueryRow(`SELECT COUNT(*) FROM layers WHERE manifest_id = ?`, manifestID).Scan(&count) 1278 + if count != 1 { 1279 + t.Fatalf("Expected 1 layer, got %d", count) 1280 + } 1281 + 1282 + // Delete user data 1283 + if err := DeleteUserData(db, testUser.DID); err != nil { 1284 + t.Fatalf("Failed to delete user data: %v", err) 1285 + } 1286 + 1287 + // Verify all data was cascade deleted 1288 + db.QueryRow(`SELECT COUNT(*) FROM users WHERE did = ?`, testUser.DID).Scan(&count) 1289 + if count != 0 { 1290 + t.Errorf("Expected 0 users, got %d", count) 1291 + } 1292 + db.QueryRow(`SELECT COUNT(*) FROM manifests WHERE did = ?`, testUser.DID).Scan(&count) 1293 + if count != 0 { 1294 + t.Errorf("Expected 0 manifests after cascade delete, got %d", count) 1295 + } 1296 + db.QueryRow(`SELECT COUNT(*) FROM tags WHERE did = ?`, testUser.DID).Scan(&count) 1297 + if count != 0 { 1298 + t.Errorf("Expected 0 tags after cascade delete, got %d", count) 1299 + } 1300 + db.QueryRow(`SELECT COUNT(*) FROM layers WHERE manifest_id = ?`, manifestID).Scan(&count) 1301 + if count != 0 { 1302 + t.Errorf("Expected 0 layers after cascade delete, got %d", count) 1303 + } 1304 + 1305 + // Test idempotency - deleting non-existent user should not error 1306 + if err := DeleteUserData(db, testUser.DID); err != nil { 1307 + t.Errorf("Deleting non-existent user should not error, got: %v", err) 1308 + } 1309 + }
+6 -30
pkg/appview/db/stats_migration.go
··· 14 14 "atcr.io/pkg/atproto" 15 15 ) 16 16 17 - // ServiceTokenGetter is a function type for getting service tokens. 18 - // This avoids importing auth from db (which would create import cycles with tests). 19 - type ServiceTokenGetter func(ctx context.Context, userDID, holdDID, pdsEndpoint string) (string, error) 20 - 21 17 // MigrateStatsToHolds migrates existing repository_stats data to hold services. 22 18 // This is a one-time migration that runs on startup. 23 19 // ··· 25 21 // 1. Checks if migration has already completed 26 22 // 2. Reads all repository_stats entries 27 23 // 3. For each entry, looks up the hold DID from manifests table 28 - // 4. Gets a service token for the user and calls the hold's setStats endpoint 24 + // 4. Calls the hold's setStats endpoint (no auth required - temporary migration endpoint) 29 25 // 5. Marks migration complete after all entries are processed 30 26 // 31 27 // If a hold is offline, the migration logs a warning and continues. 32 28 // The hold will receive real-time stats updates via Jetstream once online. 33 - // 34 - // The getServiceToken parameter is a callback to avoid import cycles with pkg/auth. 35 - func MigrateStatsToHolds(ctx context.Context, db *sql.DB, getServiceToken ServiceTokenGetter) error { 29 + func MigrateStatsToHolds(ctx context.Context, db *sql.DB) error { 36 30 // Check if migration already done 37 31 var migrationDone bool 38 32 err := db.QueryRowContext(ctx, ` ··· 121 115 continue 122 116 } 123 117 124 - // Get user's PDS endpoint 125 - user, err := GetUserByDID(db, stat.DID) 126 - if err != nil || user == nil { 127 - slog.Debug("User not found in database, skipping", "component", "migration", 128 - "did", stat.DID, "repository", stat.Repository) 129 - skipCount++ 130 - continue 131 - } 132 - 133 - // Get service token for the user 134 - serviceToken, err := getServiceToken(ctx, stat.DID, holdDID, user.PDSEndpoint) 135 - if err != nil { 136 - slog.Warn("Failed to get service token, skipping", "component", "migration", 137 - "did", stat.DID, "repository", stat.Repository, "error", err) 138 - errorCount++ 139 - continue 140 - } 141 - 142 118 // Resolve hold DID to HTTP URL 143 119 holdURL := atproto.ResolveHoldURL(holdDID) 144 120 if holdURL == "" { ··· 148 124 continue 149 125 } 150 126 151 - // Call hold's setStats endpoint 152 - err = callSetStats(ctx, holdURL, serviceToken, stat.DID, stat.Repository, 127 + // Call hold's setStats endpoint (no auth required for migration) 128 + err = callSetStats(ctx, holdURL, stat.DID, stat.Repository, 153 129 stat.PullCount, stat.PushCount, stat.LastPull.String, stat.LastPush.String) 154 130 if err != nil { 155 131 slog.Warn("Failed to migrate stats to hold, continuing", "component", "migration", ··· 185 161 } 186 162 187 163 // callSetStats calls the hold's io.atcr.hold.setStats endpoint 188 - func callSetStats(ctx context.Context, holdURL, serviceToken, ownerDID, repository string, pullCount, pushCount int64, lastPull, lastPush string) error { 164 + // No authentication required - this is a temporary migration endpoint 165 + func callSetStats(ctx context.Context, holdURL, ownerDID, repository string, pullCount, pushCount int64, lastPull, lastPush string) error { 189 166 // Build request 190 167 reqBody := map[string]any{ 191 168 "ownerDid": ownerDID, ··· 212 189 } 213 190 214 191 req.Header.Set("Content-Type", "application/json") 215 - req.Header.Set("Authorization", "Bearer "+serviceToken) 216 192 217 193 // Send request with timeout 218 194 client := &http.Client{Timeout: 10 * time.Second}
+17 -1
pkg/appview/jetstream/backfill.go
··· 4 4 "context" 5 5 "database/sql" 6 6 "encoding/json" 7 + "errors" 7 8 "fmt" 8 9 "io" 9 10 "log/slog" ··· 111 112 for _, repo := range result.Repos { 112 113 recordCount, err := b.backfillRepo(ctx, repo.DID, collection) 113 114 if err != nil { 114 - slog.Warn("Backfill failed to backfill repo", "did", repo.DID, "error", err) 115 + // RepoNotFound means account was deleted/deactivated 116 + // Clean up our cached data since the source is gone 117 + if strings.Contains(err.Error(), "RepoNotFound") { 118 + if delErr := db.DeleteUserData(b.db, repo.DID); delErr != nil { 119 + slog.Warn("Backfill failed to delete data for removed repo", "did", repo.DID, "error", delErr) 120 + } else { 121 + slog.Info("Backfill cleaned up data for deleted/deactivated repo", "did", repo.DID) 122 + } 123 + } else { 124 + slog.Warn("Backfill failed to backfill repo", "did", repo.DID, "error", err) 125 + } 115 126 continue 116 127 } 117 128 ··· 582 593 existingRecord, err := pdsClient.GetRecord(ctx, atproto.RepoPageCollection, repository) 583 594 var createdAt time.Time 584 595 var avatarRef *atproto.ATProtoBlobRef 596 + 597 + if err != nil && !errors.Is(err, atproto.ErrRecordNotFound) { 598 + // Non-404 error (e.g., no OAuth session) - fail fast instead of trying PutRecord 599 + return fmt.Errorf("failed to check existing record: %w", err) 600 + } 585 601 586 602 if err == nil && existingRecord != nil { 587 603 // Parse existing record
+56 -25
pkg/appview/jetstream/processor.go
··· 426 426 }) 427 427 } 428 428 429 - // ProcessAccount handles account status events (deactivation/reactivation) 429 + // ProcessAccount handles account status events (deactivation/deletion/etc) 430 430 // This is called when Jetstream receives an account event indicating status changes. 431 431 // 432 - // IMPORTANT: Deactivation events are ambiguous - they could indicate: 433 - // 1. Permanent account deactivation (user deleted account) 434 - // 2. PDS migration (account deactivated at old PDS, reactivated at new PDS) 432 + // Status handling: 433 + // - "deleted": Account permanently deleted - remove all cached data 434 + // - "deactivated": Could be PDS migration or permanent - invalidate cache only 435 + // - "takendown": Moderation action - invalidate cache only 436 + // - Other: Ignore 435 437 // 436 - // We DO NOT delete user data on deactivation events. Instead, we invalidate the 437 - // identity cache. On the next resolution attempt: 438 - // - If migrated: Resolution finds the new PDS and updates the database automatically 439 - // - If truly deactivated: Resolution fails and user won't appear in new queries 440 - // 441 - // This approach prevents data loss from PDS migrations while still handling deactivations. 438 + // For "deactivated", we don't delete data because it's ambiguous: 439 + // - Could be permanent deactivation (user deleted account) 440 + // - Could be PDS migration (account moves to new PDS) 441 + // Cache invalidation forces re-resolution on next lookup. 442 442 // 443 443 // Only processes events for users who already exist in our database (have ATCR activity). 444 444 func (p *Processor) ProcessAccount(ctx context.Context, did string, active bool, status string) error { 445 - // Only process deactivation events 446 - if active || status != "deactivated" { 445 + // Skip active accounts or unknown statuses 446 + if active { 447 447 return nil 448 448 } 449 449 450 - // Check if user exists in our database - only update if they're an ATCR user 450 + // Check if user exists in our database - only process if they're an ATCR user 451 451 user, err := db.GetUserByDID(p.db, did) 452 452 if err != nil { 453 453 return fmt.Errorf("failed to check user existence: %w", err) ··· 458 458 return nil 459 459 } 460 460 461 - // Invalidate cached identity data to force re-resolution on next lookup 462 - // This will discover if the account was migrated (new PDS) or truly deactivated (resolution fails) 463 - if err := atproto.InvalidateIdentity(ctx, did); err != nil { 464 - slog.Warn("Failed to invalidate identity cache for deactivated account", 461 + switch status { 462 + case "deleted": 463 + // Account permanently deleted - remove all cached data 464 + if err := db.DeleteUserData(p.db, did); err != nil { 465 + slog.Error("Failed to delete user data for deleted account", 466 + "component", "processor", 467 + "did", did, 468 + "handle", user.Handle, 469 + "error", err) 470 + return err 471 + } 472 + 473 + // Also invalidate identity cache 474 + _ = atproto.InvalidateIdentity(ctx, did) 475 + 476 + slog.Info("Deleted user data for deleted account", 465 477 "component", "processor", 466 478 "did", did, 467 - "error", err) 468 - return err 469 - } 479 + "handle", user.Handle) 470 480 471 - slog.Info("Processed account deactivation event - cache invalidated", 472 - "component", "processor", 473 - "did", did, 474 - "handle", user.Handle, 475 - "status", status) 481 + case "deactivated", "takendown": 482 + // Ambiguous status - invalidate cache but keep data 483 + // For deactivated: could be PDS migration, will resolve on next lookup 484 + // For takendown: moderation action, keep data in case of appeal 485 + if err := atproto.InvalidateIdentity(ctx, did); err != nil { 486 + slog.Warn("Failed to invalidate identity cache", 487 + "component", "processor", 488 + "did", did, 489 + "status", status, 490 + "error", err) 491 + return err 492 + } 493 + 494 + slog.Info("Processed account status event - cache invalidated", 495 + "component", "processor", 496 + "did", did, 497 + "handle", user.Handle, 498 + "status", status) 499 + 500 + default: 501 + // Unknown status - ignore 502 + slog.Debug("Ignoring unknown account status", 503 + "component", "processor", 504 + "did", did, 505 + "status", status) 506 + } 476 507 477 508 return nil 478 509 }
+23
pkg/appview/jetstream/processor_test.go
··· 691 691 if !exists { 692 692 t.Error("User should still exist after multiple deactivation events") 693 693 } 694 + 695 + // Test 6: Process account deletion - should delete user data 696 + err = processor.ProcessAccount(context.Background(), testDID, false, "deleted") 697 + if err != nil { 698 + t.Logf("Cache invalidation error during deletion (expected): %v", err) 699 + } 700 + 701 + // User should be deleted after "deleted" status 702 + err = db.QueryRow(` 703 + SELECT EXISTS(SELECT 1 FROM users WHERE did = ?) 704 + `, testDID).Scan(&exists) 705 + if err != nil { 706 + t.Fatalf("Failed to check if user exists after deletion: %v", err) 707 + } 708 + if exists { 709 + t.Error("User should NOT exist after deletion event") 710 + } 711 + 712 + // Test 7: Process deletion for already-deleted user (idempotent) 713 + err = processor.ProcessAccount(context.Background(), testDID, false, "deleted") 714 + if err != nil { 715 + t.Errorf("Deletion of non-existent user should not error, got: %v", err) 716 + } 694 717 }
+1 -1
pkg/appview/jetstream/worker.go
··· 203 203 return ctx.Err() 204 204 case <-heartbeatTicker.C: 205 205 elapsed := time.Since(lastHeartbeat) 206 - slog.Info("Jetstream alive", "events_processed", eventCount, "elapsed_seconds", elapsed.Seconds()) 206 + slog.Debug("Jetstream alive", "events_processed", eventCount, "elapsed_seconds", elapsed.Seconds()) 207 207 eventCount = 0 208 208 lastHeartbeat = time.Now() 209 209 default:
+3 -14
pkg/hold/oci/xrpc.go
··· 381 381 } 382 382 383 383 // HandleSetStats sets absolute stats values for a repository (used by migration) 384 - // This is a migration-only endpoint that allows AppView to sync existing stats to holds 384 + // This is a temporary migration-only endpoint that allows AppView to sync existing stats to holds. 385 + // No authentication required - this endpoint will be removed after migration is complete. 386 + // TODO: Remove this endpoint after stats migration is complete 385 387 func (h *XRPCHandler) HandleSetStats(w http.ResponseWriter, r *http.Request) { 386 388 ctx := r.Context() 387 389 388 - // Validate service token (same auth as blob:write endpoints) 389 - validatedUser, err := pds.ValidateBlobWriteAccess(r, h.pds, h.httpClient) 390 - if err != nil { 391 - RespondError(w, http.StatusForbidden, fmt.Sprintf("authorization failed: %v", err)) 392 - return 393 - } 394 - 395 390 // Parse request 396 391 var req struct { 397 392 OwnerDID string `json:"ownerDid"` ··· 404 399 405 400 if err := DecodeJSON(r, &req); err != nil { 406 401 RespondError(w, http.StatusBadRequest, err.Error()) 407 - return 408 - } 409 - 410 - // Verify user DID matches token (user can only set stats for their own repos) 411 - if req.OwnerDID != validatedUser.DID { 412 - RespondError(w, http.StatusForbidden, "owner DID mismatch") 413 402 return 414 403 } 415 404