···717717 return err
718718}
719719720720+// DeleteUserData deletes all cached data for a user.
721721+// This is used when an account is permanently deleted or when we discover
722722+// the account no longer exists (e.g., RepoNotFound during backfill).
723723+//
724724+// Due to ON DELETE CASCADE in the schema, deleting from users will automatically
725725+// cascade to: manifests, tags, layers, references, annotations, stars, repo_pages, etc.
726726+func DeleteUserData(db *sql.DB, did string) error {
727727+ result, err := db.Exec(`DELETE FROM users WHERE did = ?`, did)
728728+ if err != nil {
729729+ return fmt.Errorf("failed to delete user: %w", err)
730730+ }
731731+732732+ rowsAffected, _ := result.RowsAffected()
733733+ if rowsAffected == 0 {
734734+ // User didn't exist, nothing to delete
735735+ return nil
736736+ }
737737+738738+ return nil
739739+}
740740+720741// GetManifest fetches a single manifest by digest
721742// Note: Annotations are stored separately in repository_annotations table
722743func GetManifest(db *sql.DB, digest string) (*Manifest, error) {
+108
pkg/appview/db/queries_test.go
···11991199 })
12001200 }
12011201}
12021202+12031203+func TestDeleteUserData(t *testing.T) {
12041204+ db, err := InitDB(":memory:", true)
12051205+ if err != nil {
12061206+ t.Fatalf("Failed to init database: %v", err)
12071207+ }
12081208+ defer db.Close()
12091209+12101210+ // Create test user with related data
12111211+ testUser := &User{
12121212+ DID: "did:plc:deleteme",
12131213+ Handle: "deleteme.bsky.social",
12141214+ PDSEndpoint: "https://test.pds.example.com",
12151215+ LastSeen: time.Now(),
12161216+ }
12171217+ if err := UpsertUser(db, testUser); err != nil {
12181218+ t.Fatalf("Failed to insert user: %v", err)
12191219+ }
12201220+12211221+ // Add manifest
12221222+ manifest := &Manifest{
12231223+ DID: testUser.DID,
12241224+ Repository: "myapp",
12251225+ Digest: "sha256:abc123",
12261226+ HoldEndpoint: "did:web:hold.example.com",
12271227+ SchemaVersion: 2,
12281228+ MediaType: "application/vnd.oci.image.manifest.v1+json",
12291229+ CreatedAt: time.Now(),
12301230+ }
12311231+ manifestID, err := InsertManifest(db, manifest)
12321232+ if err != nil {
12331233+ t.Fatalf("Failed to insert manifest: %v", err)
12341234+ }
12351235+12361236+ // Add layer
12371237+ layer := &Layer{
12381238+ ManifestID: manifestID,
12391239+ LayerIndex: 0,
12401240+ Digest: "sha256:layer1",
12411241+ Size: 1000,
12421242+ MediaType: "application/vnd.oci.image.layer.v1.tar+gzip",
12431243+ }
12441244+ if err := InsertLayer(db, layer); err != nil {
12451245+ t.Fatalf("Failed to insert layer: %v", err)
12461246+ }
12471247+12481248+ // Add tag
12491249+ tag := &Tag{
12501250+ DID: testUser.DID,
12511251+ Repository: "myapp",
12521252+ Tag: "latest",
12531253+ Digest: "sha256:abc123",
12541254+ CreatedAt: time.Now(),
12551255+ }
12561256+ if err := UpsertTag(db, tag); err != nil {
12571257+ t.Fatalf("Failed to insert tag: %v", err)
12581258+ }
12591259+12601260+ // Add annotations
12611261+ if err := UpsertRepositoryAnnotations(db, testUser.DID, "myapp", map[string]string{
12621262+ "org.opencontainers.image.title": "My App",
12631263+ }); err != nil {
12641264+ t.Fatalf("Failed to insert annotations: %v", err)
12651265+ }
12661266+12671267+ // Verify data exists
12681268+ var count int
12691269+ db.QueryRow(`SELECT COUNT(*) FROM manifests WHERE did = ?`, testUser.DID).Scan(&count)
12701270+ if count != 1 {
12711271+ t.Fatalf("Expected 1 manifest, got %d", count)
12721272+ }
12731273+ db.QueryRow(`SELECT COUNT(*) FROM tags WHERE did = ?`, testUser.DID).Scan(&count)
12741274+ if count != 1 {
12751275+ t.Fatalf("Expected 1 tag, got %d", count)
12761276+ }
12771277+ db.QueryRow(`SELECT COUNT(*) FROM layers WHERE manifest_id = ?`, manifestID).Scan(&count)
12781278+ if count != 1 {
12791279+ t.Fatalf("Expected 1 layer, got %d", count)
12801280+ }
12811281+12821282+ // Delete user data
12831283+ if err := DeleteUserData(db, testUser.DID); err != nil {
12841284+ t.Fatalf("Failed to delete user data: %v", err)
12851285+ }
12861286+12871287+ // Verify all data was cascade deleted
12881288+ db.QueryRow(`SELECT COUNT(*) FROM users WHERE did = ?`, testUser.DID).Scan(&count)
12891289+ if count != 0 {
12901290+ t.Errorf("Expected 0 users, got %d", count)
12911291+ }
12921292+ db.QueryRow(`SELECT COUNT(*) FROM manifests WHERE did = ?`, testUser.DID).Scan(&count)
12931293+ if count != 0 {
12941294+ t.Errorf("Expected 0 manifests after cascade delete, got %d", count)
12951295+ }
12961296+ db.QueryRow(`SELECT COUNT(*) FROM tags WHERE did = ?`, testUser.DID).Scan(&count)
12971297+ if count != 0 {
12981298+ t.Errorf("Expected 0 tags after cascade delete, got %d", count)
12991299+ }
13001300+ db.QueryRow(`SELECT COUNT(*) FROM layers WHERE manifest_id = ?`, manifestID).Scan(&count)
13011301+ if count != 0 {
13021302+ t.Errorf("Expected 0 layers after cascade delete, got %d", count)
13031303+ }
13041304+13051305+ // Test idempotency - deleting non-existent user should not error
13061306+ if err := DeleteUserData(db, testUser.DID); err != nil {
13071307+ t.Errorf("Deleting non-existent user should not error, got: %v", err)
13081308+ }
13091309+}
+6-30
pkg/appview/db/stats_migration.go
···1414 "atcr.io/pkg/atproto"
1515)
16161717-// ServiceTokenGetter is a function type for getting service tokens.
1818-// This avoids importing auth from db (which would create import cycles with tests).
1919-type ServiceTokenGetter func(ctx context.Context, userDID, holdDID, pdsEndpoint string) (string, error)
2020-2117// MigrateStatsToHolds migrates existing repository_stats data to hold services.
2218// This is a one-time migration that runs on startup.
2319//
···2521// 1. Checks if migration has already completed
2622// 2. Reads all repository_stats entries
2723// 3. For each entry, looks up the hold DID from manifests table
2828-// 4. Gets a service token for the user and calls the hold's setStats endpoint
2424+// 4. Calls the hold's setStats endpoint (no auth required - temporary migration endpoint)
2925// 5. Marks migration complete after all entries are processed
3026//
3127// If a hold is offline, the migration logs a warning and continues.
3228// The hold will receive real-time stats updates via Jetstream once online.
3333-//
3434-// The getServiceToken parameter is a callback to avoid import cycles with pkg/auth.
3535-func MigrateStatsToHolds(ctx context.Context, db *sql.DB, getServiceToken ServiceTokenGetter) error {
2929+func MigrateStatsToHolds(ctx context.Context, db *sql.DB) error {
3630 // Check if migration already done
3731 var migrationDone bool
3832 err := db.QueryRowContext(ctx, `
···121115 continue
122116 }
123117124124- // Get user's PDS endpoint
125125- user, err := GetUserByDID(db, stat.DID)
126126- if err != nil || user == nil {
127127- slog.Debug("User not found in database, skipping", "component", "migration",
128128- "did", stat.DID, "repository", stat.Repository)
129129- skipCount++
130130- continue
131131- }
132132-133133- // Get service token for the user
134134- serviceToken, err := getServiceToken(ctx, stat.DID, holdDID, user.PDSEndpoint)
135135- if err != nil {
136136- slog.Warn("Failed to get service token, skipping", "component", "migration",
137137- "did", stat.DID, "repository", stat.Repository, "error", err)
138138- errorCount++
139139- continue
140140- }
141141-142118 // Resolve hold DID to HTTP URL
143119 holdURL := atproto.ResolveHoldURL(holdDID)
144120 if holdURL == "" {
···148124 continue
149125 }
150126151151- // Call hold's setStats endpoint
152152- err = callSetStats(ctx, holdURL, serviceToken, stat.DID, stat.Repository,
127127+ // Call hold's setStats endpoint (no auth required for migration)
128128+ err = callSetStats(ctx, holdURL, stat.DID, stat.Repository,
153129 stat.PullCount, stat.PushCount, stat.LastPull.String, stat.LastPush.String)
154130 if err != nil {
155131 slog.Warn("Failed to migrate stats to hold, continuing", "component", "migration",
···185161}
186162187163// callSetStats calls the hold's io.atcr.hold.setStats endpoint
188188-func callSetStats(ctx context.Context, holdURL, serviceToken, ownerDID, repository string, pullCount, pushCount int64, lastPull, lastPush string) error {
164164+// No authentication required - this is a temporary migration endpoint
165165+func callSetStats(ctx context.Context, holdURL, ownerDID, repository string, pullCount, pushCount int64, lastPull, lastPush string) error {
189166 // Build request
190167 reqBody := map[string]any{
191168 "ownerDid": ownerDID,
···212189 }
213190214191 req.Header.Set("Content-Type", "application/json")
215215- req.Header.Set("Authorization", "Bearer "+serviceToken)
216192217193 // Send request with timeout
218194 client := &http.Client{Timeout: 10 * time.Second}
+17-1
pkg/appview/jetstream/backfill.go
···44 "context"
55 "database/sql"
66 "encoding/json"
77+ "errors"
78 "fmt"
89 "io"
910 "log/slog"
···111112 for _, repo := range result.Repos {
112113 recordCount, err := b.backfillRepo(ctx, repo.DID, collection)
113114 if err != nil {
114114- slog.Warn("Backfill failed to backfill repo", "did", repo.DID, "error", err)
115115+ // RepoNotFound means account was deleted/deactivated
116116+ // Clean up our cached data since the source is gone
117117+ if strings.Contains(err.Error(), "RepoNotFound") {
118118+ if delErr := db.DeleteUserData(b.db, repo.DID); delErr != nil {
119119+ slog.Warn("Backfill failed to delete data for removed repo", "did", repo.DID, "error", delErr)
120120+ } else {
121121+ slog.Info("Backfill cleaned up data for deleted/deactivated repo", "did", repo.DID)
122122+ }
123123+ } else {
124124+ slog.Warn("Backfill failed to backfill repo", "did", repo.DID, "error", err)
125125+ }
115126 continue
116127 }
117128···582593 existingRecord, err := pdsClient.GetRecord(ctx, atproto.RepoPageCollection, repository)
583594 var createdAt time.Time
584595 var avatarRef *atproto.ATProtoBlobRef
596596+597597+ if err != nil && !errors.Is(err, atproto.ErrRecordNotFound) {
598598+ // Non-404 error (e.g., no OAuth session) - fail fast instead of trying PutRecord
599599+ return fmt.Errorf("failed to check existing record: %w", err)
600600+ }
585601586602 if err == nil && existingRecord != nil {
587603 // Parse existing record
+56-25
pkg/appview/jetstream/processor.go
···426426 })
427427}
428428429429-// ProcessAccount handles account status events (deactivation/reactivation)
429429+// ProcessAccount handles account status events (deactivation/deletion/etc)
430430// This is called when Jetstream receives an account event indicating status changes.
431431//
432432-// IMPORTANT: Deactivation events are ambiguous - they could indicate:
433433-// 1. Permanent account deactivation (user deleted account)
434434-// 2. PDS migration (account deactivated at old PDS, reactivated at new PDS)
432432+// Status handling:
433433+// - "deleted": Account permanently deleted - remove all cached data
434434+// - "deactivated": Could be PDS migration or permanent - invalidate cache only
435435+// - "takendown": Moderation action - invalidate cache only
436436+// - Other: Ignore
435437//
436436-// We DO NOT delete user data on deactivation events. Instead, we invalidate the
437437-// identity cache. On the next resolution attempt:
438438-// - If migrated: Resolution finds the new PDS and updates the database automatically
439439-// - If truly deactivated: Resolution fails and user won't appear in new queries
440440-//
441441-// This approach prevents data loss from PDS migrations while still handling deactivations.
438438+// For "deactivated", we don't delete data because it's ambiguous:
439439+// - Could be permanent deactivation (user deleted account)
440440+// - Could be PDS migration (account moves to new PDS)
441441+// Cache invalidation forces re-resolution on next lookup.
442442//
443443// Only processes events for users who already exist in our database (have ATCR activity).
444444func (p *Processor) ProcessAccount(ctx context.Context, did string, active bool, status string) error {
445445- // Only process deactivation events
446446- if active || status != "deactivated" {
445445+ // Skip active accounts or unknown statuses
446446+ if active {
447447 return nil
448448 }
449449450450- // Check if user exists in our database - only update if they're an ATCR user
450450+ // Check if user exists in our database - only process if they're an ATCR user
451451 user, err := db.GetUserByDID(p.db, did)
452452 if err != nil {
453453 return fmt.Errorf("failed to check user existence: %w", err)
···458458 return nil
459459 }
460460461461- // Invalidate cached identity data to force re-resolution on next lookup
462462- // This will discover if the account was migrated (new PDS) or truly deactivated (resolution fails)
463463- if err := atproto.InvalidateIdentity(ctx, did); err != nil {
464464- slog.Warn("Failed to invalidate identity cache for deactivated account",
461461+ switch status {
462462+ case "deleted":
463463+ // Account permanently deleted - remove all cached data
464464+ if err := db.DeleteUserData(p.db, did); err != nil {
465465+ slog.Error("Failed to delete user data for deleted account",
466466+ "component", "processor",
467467+ "did", did,
468468+ "handle", user.Handle,
469469+ "error", err)
470470+ return err
471471+ }
472472+473473+ // Also invalidate identity cache
474474+ _ = atproto.InvalidateIdentity(ctx, did)
475475+476476+ slog.Info("Deleted user data for deleted account",
465477 "component", "processor",
466478 "did", did,
467467- "error", err)
468468- return err
469469- }
479479+ "handle", user.Handle)
470480471471- slog.Info("Processed account deactivation event - cache invalidated",
472472- "component", "processor",
473473- "did", did,
474474- "handle", user.Handle,
475475- "status", status)
481481+ case "deactivated", "takendown":
482482+ // Ambiguous status - invalidate cache but keep data
483483+ // For deactivated: could be PDS migration, will resolve on next lookup
484484+ // For takendown: moderation action, keep data in case of appeal
485485+ if err := atproto.InvalidateIdentity(ctx, did); err != nil {
486486+ slog.Warn("Failed to invalidate identity cache",
487487+ "component", "processor",
488488+ "did", did,
489489+ "status", status,
490490+ "error", err)
491491+ return err
492492+ }
493493+494494+ slog.Info("Processed account status event - cache invalidated",
495495+ "component", "processor",
496496+ "did", did,
497497+ "handle", user.Handle,
498498+ "status", status)
499499+500500+ default:
501501+ // Unknown status - ignore
502502+ slog.Debug("Ignoring unknown account status",
503503+ "component", "processor",
504504+ "did", did,
505505+ "status", status)
506506+ }
476507477508 return nil
478509}
+23
pkg/appview/jetstream/processor_test.go
···691691 if !exists {
692692 t.Error("User should still exist after multiple deactivation events")
693693 }
694694+695695+ // Test 6: Process account deletion - should delete user data
696696+ err = processor.ProcessAccount(context.Background(), testDID, false, "deleted")
697697+ if err != nil {
698698+ t.Logf("Cache invalidation error during deletion (expected): %v", err)
699699+ }
700700+701701+ // User should be deleted after "deleted" status
702702+ err = db.QueryRow(`
703703+ SELECT EXISTS(SELECT 1 FROM users WHERE did = ?)
704704+ `, testDID).Scan(&exists)
705705+ if err != nil {
706706+ t.Fatalf("Failed to check if user exists after deletion: %v", err)
707707+ }
708708+ if exists {
709709+ t.Error("User should NOT exist after deletion event")
710710+ }
711711+712712+ // Test 7: Process deletion for already-deleted user (idempotent)
713713+ err = processor.ProcessAccount(context.Background(), testDID, false, "deleted")
714714+ if err != nil {
715715+ t.Errorf("Deletion of non-existent user should not error, got: %v", err)
716716+ }
694717}
···381381}
382382383383// HandleSetStats sets absolute stats values for a repository (used by migration)
384384-// This is a migration-only endpoint that allows AppView to sync existing stats to holds
384384+// This is a temporary migration-only endpoint that allows AppView to sync existing stats to holds.
385385+// No authentication required - this endpoint will be removed after migration is complete.
386386+// TODO: Remove this endpoint after stats migration is complete
385387func (h *XRPCHandler) HandleSetStats(w http.ResponseWriter, r *http.Request) {
386388 ctx := r.Context()
387389388388- // Validate service token (same auth as blob:write endpoints)
389389- validatedUser, err := pds.ValidateBlobWriteAccess(r, h.pds, h.httpClient)
390390- if err != nil {
391391- RespondError(w, http.StatusForbidden, fmt.Sprintf("authorization failed: %v", err))
392392- return
393393- }
394394-395390 // Parse request
396391 var req struct {
397392 OwnerDID string `json:"ownerDid"`
···404399405400 if err := DecodeJSON(r, &req); err != nil {
406401 RespondError(w, http.StatusBadRequest, err.Error())
407407- return
408408- }
409409-410410- // Verify user DID matches token (user can only set stats for their own repos)
411411- if req.OwnerDID != validatedUser.DID {
412412- RespondError(w, http.StatusForbidden, "owner DID mismatch")
413402 return
414403 }
415404