···349349- Implements `distribution.Repository` interface
350350- Uses RegistryContext to pass DID, PDS endpoint, hold DID, OAuth refresher, etc.
351351352352-**hold_cache.go**: In-memory hold DID cache
353353-- Caches `(DID, repository) → holdDid` for pull operations
354354-- TTL: 10 minutes (covers typical pull operations)
355355-- Cleanup: Background goroutine runs every 5 minutes
356356-- **NOTE:** Simple in-memory cache for MVP. For production: use Redis or similar
357357-- Prevents expensive PDS manifest lookups on every blob request during pull
352352+**Database-based hold DID lookups**:
353353+- Queries SQLite `manifests` table for hold DID (indexed, fast)
354354+- No in-memory caching needed - database IS the cache
355355+- Persistent across restarts, multi-instance safe
356356+- Pull operations use hold DID from latest manifest (historical reference)
357357+- Push operations use fresh discovery from profile/default
358358+- Function: `db.GetLatestHoldDIDForRepo(did, repository)` in `pkg/appview/db/queries.go`
358359359360**proxy_blob_store.go**: External storage proxy (routes to hold via XRPC)
360361- Resolves hold DID → HTTP URL for XRPC requests (did:web resolution)
···604605605606**General:**
606607- Middleware is in `pkg/appview/middleware/` (auth.go, registry.go)
607607-- Storage routing is in `pkg/appview/storage/` (routing_repository.go, proxy_blob_store.go, hold_cache.go)
608608+- Storage routing is in `pkg/appview/storage/` (routing_repository.go, proxy_blob_store.go)
609609+- Hold DID lookups use database queries (no in-memory caching)
608610- Storage drivers imported as `_ "github.com/distribution/distribution/v3/registry/storage/driver/s3-aws"`
609611- Hold service reuses distribution's driver factory for multi-backend support
610612
+29
pkg/appview/db/queries.go
···724724 return &m, nil
725725}
726726727727+// GetLatestHoldDIDForRepo returns the hold DID from the most recent manifest for a repository
728728+// Returns empty string if no manifests exist (e.g., first push)
729729+// This is used instead of the in-memory cache to determine which hold to use for blob operations
730730+func GetLatestHoldDIDForRepo(db *sql.DB, did, repository string) (string, error) {
731731+ var holdDID string
732732+ err := db.QueryRow(`
733733+ SELECT hold_endpoint
734734+ FROM manifests
735735+ WHERE did = ? AND repository = ?
736736+ ORDER BY created_at DESC
737737+ LIMIT 1
738738+ `, did, repository).Scan(&holdDID)
739739+740740+ if err == sql.ErrNoRows {
741741+ // No manifests yet - return empty string (first push case)
742742+ return "", nil
743743+ }
744744+ if err != nil {
745745+ return "", err
746746+ }
747747+748748+ return holdDID, nil
749749+}
750750+727751// GetRepositoriesForDID returns all unique repository names for a DID
728752// Used by backfill to reconcile annotations for all repositories
729753func GetRepositoriesForDID(db *sql.DB, did string) ([]string, error) {
···15741598// IncrementPushCount increments the push count for a repository
15751599func (m *MetricsDB) IncrementPushCount(did, repository string) error {
15761600 return IncrementPushCount(m.db, did, repository)
16011601+}
16021602+16031603+// GetLatestHoldDIDForRepo returns the hold DID from the most recent manifest for a repository
16041604+func (m *MetricsDB) GetLatestHoldDIDForRepo(did, repository string) (string, error) {
16051605+ return GetLatestHoldDIDForRepo(m.db, did, repository)
15771606}
1578160715791608// GetFeaturedRepositories fetches top repositories sorted by stars and pulls
+2-1
pkg/appview/storage/context.go
···88 "atcr.io/pkg/auth/oauth"
99)
10101111-// DatabaseMetrics interface for tracking pull/push counts
1111+// DatabaseMetrics interface for tracking pull/push counts and querying hold DIDs
1212type DatabaseMetrics interface {
1313 IncrementPullCount(did, repository string) error
1414 IncrementPushCount(did, repository string) error
1515+ GetLatestHoldDIDForRepo(did, repository string) (string, error)
1516}
16171718// ReadmeCache interface for README content caching
···11-package storage
22-33-import (
44- "sync"
55- "time"
66-)
77-88-// HoldCache caches hold DIDs for (DID, repository) pairs
99-// This avoids expensive ATProto lookups on every blob request during pulls
1010-//
1111-// NOTE: This is a simple in-memory cache for MVP. For production deployments:
1212-// - Use Redis or similar for distributed caching
1313-// - Consider implementing cache size limits
1414-// - Monitor memory usage under high load
1515-type HoldCache struct {
1616- mu sync.RWMutex
1717- cache map[string]*holdCacheEntry
1818-}
1919-2020-type holdCacheEntry struct {
2121- holdDID string
2222- expiresAt time.Time
2323-}
2424-2525-var globalHoldCache = &HoldCache{
2626- cache: make(map[string]*holdCacheEntry),
2727-}
2828-2929-func init() {
3030- // Start background cleanup goroutine
3131- go func() {
3232- ticker := time.NewTicker(5 * time.Minute)
3333- defer ticker.Stop()
3434- for range ticker.C {
3535- globalHoldCache.Cleanup()
3636- }
3737- }()
3838-}
3939-4040-// GetGlobalHoldCache returns the global hold cache instance
4141-func GetGlobalHoldCache() *HoldCache {
4242- return globalHoldCache
4343-}
4444-4545-// Set stores a hold DID for a (DID, repository) pair with a TTL
4646-func (c *HoldCache) Set(did, repository, holdDID string, ttl time.Duration) {
4747- c.mu.Lock()
4848- defer c.mu.Unlock()
4949-5050- key := did + ":" + repository
5151- c.cache[key] = &holdCacheEntry{
5252- holdDID: holdDID,
5353- expiresAt: time.Now().Add(ttl),
5454- }
5555-}
5656-5757-// Get retrieves a hold DID for a (DID, repository) pair
5858-// Returns empty string and false if not found or expired
5959-func (c *HoldCache) Get(did, repository string) (string, bool) {
6060- c.mu.RLock()
6161- defer c.mu.RUnlock()
6262-6363- key := did + ":" + repository
6464- entry, ok := c.cache[key]
6565- if !ok {
6666- return "", false
6767- }
6868-6969- // Check if expired
7070- if time.Now().After(entry.expiresAt) {
7171- // Don't delete here (would need write lock), let cleanup handle it
7272- return "", false
7373- }
7474-7575- return entry.holdDID, true
7676-}
7777-7878-// Cleanup removes expired entries (called automatically every 5 minutes)
7979-func (c *HoldCache) Cleanup() {
8080- c.mu.Lock()
8181- defer c.mu.Unlock()
8282-8383- now := time.Now()
8484- removed := 0
8585- for key, entry := range c.cache {
8686- if now.After(entry.expiresAt) {
8787- delete(c.cache, key)
8888- removed++
8989- }
9090- }
9191-9292- // Log cleanup stats for monitoring
9393- if removed > 0 || len(c.cache) > 100 {
9494- // Log if we removed entries OR if cache is growing large
9595- // This helps identify if cache size is becoming a concern
9696- println("Hold cache cleanup: removed", removed, "entries, remaining", len(c.cache))
9797- }
9898-}
-150
pkg/appview/storage/hold_cache_test.go
···11-package storage
22-33-import (
44- "testing"
55- "time"
66-)
77-88-func TestHoldCache_SetAndGet(t *testing.T) {
99- cache := &HoldCache{
1010- cache: make(map[string]*holdCacheEntry),
1111- }
1212-1313- did := "did:plc:test123"
1414- repo := "myapp"
1515- holdDID := "did:web:hold01.atcr.io"
1616- ttl := 10 * time.Minute
1717-1818- // Set a value
1919- cache.Set(did, repo, holdDID, ttl)
2020-2121- // Get the value - should succeed
2222- gotHoldDID, ok := cache.Get(did, repo)
2323- if !ok {
2424- t.Fatal("Expected Get to return true, got false")
2525- }
2626- if gotHoldDID != holdDID {
2727- t.Errorf("Expected hold DID %q, got %q", holdDID, gotHoldDID)
2828- }
2929-}
3030-3131-func TestHoldCache_GetNonExistent(t *testing.T) {
3232- cache := &HoldCache{
3333- cache: make(map[string]*holdCacheEntry),
3434- }
3535-3636- // Get non-existent value
3737- _, ok := cache.Get("did:plc:nonexistent", "repo")
3838- if ok {
3939- t.Error("Expected Get to return false for non-existent key")
4040- }
4141-}
4242-4343-func TestHoldCache_ExpiredEntry(t *testing.T) {
4444- cache := &HoldCache{
4545- cache: make(map[string]*holdCacheEntry),
4646- }
4747-4848- did := "did:plc:test123"
4949- repo := "myapp"
5050- holdDID := "did:web:hold01.atcr.io"
5151-5252- // Set with very short TTL
5353- cache.Set(did, repo, holdDID, 10*time.Millisecond)
5454-5555- // Wait for expiration
5656- time.Sleep(20 * time.Millisecond)
5757-5858- // Get should return false
5959- _, ok := cache.Get(did, repo)
6060- if ok {
6161- t.Error("Expected Get to return false for expired entry")
6262- }
6363-}
6464-6565-func TestHoldCache_Cleanup(t *testing.T) {
6666- cache := &HoldCache{
6767- cache: make(map[string]*holdCacheEntry),
6868- }
6969-7070- // Add multiple entries with different TTLs
7171- cache.Set("did:plc:1", "repo1", "hold1", 10*time.Millisecond)
7272- cache.Set("did:plc:2", "repo2", "hold2", 1*time.Hour)
7373- cache.Set("did:plc:3", "repo3", "hold3", 10*time.Millisecond)
7474-7575- // Wait for some to expire
7676- time.Sleep(20 * time.Millisecond)
7777-7878- // Run cleanup
7979- cache.Cleanup()
8080-8181- // Verify expired entries are removed
8282- if _, ok := cache.Get("did:plc:1", "repo1"); ok {
8383- t.Error("Expected expired entry 1 to be removed")
8484- }
8585- if _, ok := cache.Get("did:plc:3", "repo3"); ok {
8686- t.Error("Expected expired entry 3 to be removed")
8787- }
8888-8989- // Verify non-expired entry remains
9090- if _, ok := cache.Get("did:plc:2", "repo2"); !ok {
9191- t.Error("Expected non-expired entry to remain")
9292- }
9393-}
9494-9595-func TestHoldCache_ConcurrentAccess(t *testing.T) {
9696- cache := &HoldCache{
9797- cache: make(map[string]*holdCacheEntry),
9898- }
9999-100100- done := make(chan bool)
101101-102102- // Concurrent writes
103103- for i := 0; i < 10; i++ {
104104- go func(id int) {
105105- did := "did:plc:concurrent"
106106- repo := "repo" + string(rune(id))
107107- holdDID := "hold" + string(rune(id))
108108- cache.Set(did, repo, holdDID, 1*time.Minute)
109109- done <- true
110110- }(i)
111111- }
112112-113113- // Concurrent reads
114114- for i := 0; i < 10; i++ {
115115- go func(id int) {
116116- repo := "repo" + string(rune(id))
117117- cache.Get("did:plc:concurrent", repo)
118118- done <- true
119119- }(i)
120120- }
121121-122122- // Wait for all goroutines
123123- for i := 0; i < 20; i++ {
124124- <-done
125125- }
126126-}
127127-128128-func TestHoldCache_KeyFormat(t *testing.T) {
129129- cache := &HoldCache{
130130- cache: make(map[string]*holdCacheEntry),
131131- }
132132-133133- did := "did:plc:test"
134134- repo := "myrepo"
135135- holdDID := "did:web:hold"
136136-137137- cache.Set(did, repo, holdDID, 1*time.Minute)
138138-139139- // Verify the key is stored correctly (did:repo)
140140- expectedKey := did + ":" + repo
141141- if _, exists := cache.cache[expectedKey]; !exists {
142142- t.Errorf("Expected key %q to exist in cache", expectedKey)
143143- }
144144-}
145145-146146-// TODO: Add more comprehensive tests:
147147-// - Test GetGlobalHoldCache()
148148-// - Test cache size monitoring
149149-// - Benchmark cache performance under load
150150-// - Test cleanup goroutine timing
+18-22
pkg/appview/storage/routing_repository.go
···11// Package storage implements the storage routing layer for AppView.
22// It routes manifests to ATProto PDS (as io.atcr.manifest records) and
33-// blobs to hold services via XRPC, with hold DID caching for efficient pulls.
33+// blobs to hold services via XRPC, with database-based hold DID lookups.
44// All storage operations are proxied - AppView stores nothing locally.
55package storage
66···88 "context"
99 "log/slog"
1010 "sync"
1111- "time"
12111312 "github.com/distribution/distribution/v3"
1413)
···5049 manifestStore := r.manifestStore
5150 r.mu.Unlock()
52515353- // After any manifest operation, cache the hold DID for blob fetches
5454- // We use a goroutine to avoid blocking, and check after a short delay to allow the operation to complete
5555- go func() {
5656- time.Sleep(100 * time.Millisecond) // Brief delay to let manifest fetch complete
5757- if holdDID := manifestStore.GetLastFetchedHoldDID(); holdDID != "" {
5858- // Cache for 10 minutes - should cover typical pull operations
5959- GetGlobalHoldCache().Set(r.Ctx.DID, r.Ctx.Repository, holdDID, 10*time.Minute)
6060- slog.Debug("Cached hold DID", "component", "storage/routing", "did", r.Ctx.DID, "repo", r.Ctx.Repository, "hold", holdDID)
6161- }
6262- }()
6363-6452 return manifestStore, nil
6553}
6654···7664 return blobStore
7765 }
78667979- // For pull operations, check if we have a cached hold DID from a recent manifest fetch
6767+ // For pull operations, check database for hold DID from the most recent manifest
8068 // This ensures blobs are fetched from the hold recorded in the manifest, not re-discovered
8169 holdDID := r.Ctx.HoldDID // Default to discovery-based DID
7070+ holdSource := "discovery"
82718383- if cachedHoldDID, ok := GetGlobalHoldCache().Get(r.Ctx.DID, r.Ctx.Repository); ok {
8484- // Use cached hold DID from manifest
8585- holdDID = cachedHoldDID
8686- slog.Debug("Using cached hold from manifest", "component", "storage/blobs", "did", r.Ctx.DID, "repo", r.Ctx.Repository, "hold", cachedHoldDID)
8787- } else {
8888- // No cached hold, use discovery-based DID (for push or first pull)
8989- slog.Debug("Using discovery-based hold", "component", "storage/blobs", "did", r.Ctx.DID, "repo", r.Ctx.Repository, "hold", holdDID)
7272+ if r.Ctx.Database != nil {
7373+ // Query database for the latest manifest's hold DID
7474+ if dbHoldDID, err := r.Ctx.Database.GetLatestHoldDIDForRepo(r.Ctx.DID, r.Ctx.Repository); err == nil && dbHoldDID != "" {
7575+ // Use hold DID from database (pull case - use historical reference)
7676+ holdDID = dbHoldDID
7777+ holdSource = "database"
7878+ slog.Debug("Using hold from database manifest", "component", "storage/blobs", "did", r.Ctx.DID, "repo", r.Ctx.Repository, "hold", dbHoldDID)
7979+ } else if err != nil {
8080+ // Log error but don't fail - fall back to discovery-based DID
8181+ slog.Warn("Failed to query database for hold DID", "component", "storage/blobs", "error", err)
8282+ }
8383+ // If dbHoldDID is empty (no manifests yet), fall through to use discovery-based DID
9084 }
91859286 if holdDID == "" {
···9488 panic("hold DID not set in RegistryContext - ensure default_hold_did is configured in middleware")
9589 }
96909797- // Update context with the correct hold DID (may be cached or discovered)
9191+ slog.Debug("Using hold DID for blobs", "component", "storage/blobs", "did", r.Ctx.DID, "repo", r.Ctx.Repository, "hold", holdDID, "source", holdSource)
9292+9393+ // Update context with the correct hold DID (may be from database or discovered)
9894 r.Ctx.HoldDID = holdDID
999510096 // Create and cache proxy blob store
+61-26
pkg/appview/storage/routing_repository_test.go
···44 "context"
55 "sync"
66 "testing"
77- "time"
8798 "github.com/distribution/distribution/v3"
109 "github.com/stretchr/testify/assert"
···12111312 "atcr.io/pkg/atproto"
1413)
1414+1515+// mockDatabase is a simple mock for testing
1616+type mockDatabase struct {
1717+ holdDID string
1818+ err error
1919+}
2020+2121+func (m *mockDatabase) IncrementPullCount(did, repository string) error {
2222+ return nil
2323+}
2424+2525+func (m *mockDatabase) IncrementPushCount(did, repository string) error {
2626+ return nil
2727+}
2828+2929+func (m *mockDatabase) GetLatestHoldDIDForRepo(did, repository string) (string, error) {
3030+ if m.err != nil {
3131+ return "", m.err
3232+ }
3333+ return m.holdDID, nil
3434+}
15351636func TestNewRoutingRepository(t *testing.T) {
1737 ctx := &RegistryContext{
···89109 assert.NotNil(t, repo.manifestStore)
90110}
911119292-// TestRoutingRepository_Blobs_WithCache tests blob store with cached hold DID
9393-func TestRoutingRepository_Blobs_WithCache(t *testing.T) {
9494- // Pre-populate the hold cache
9595- cache := GetGlobalHoldCache()
9696- cachedHoldDID := "did:web:cached.hold.io"
9797- cache.Set("did:plc:test123", "myapp", cachedHoldDID, 10*time.Minute)
112112+// TestRoutingRepository_Blobs_WithDatabase tests blob store with database hold DID
113113+func TestRoutingRepository_Blobs_WithDatabase(t *testing.T) {
114114+ dbHoldDID := "did:web:database.hold.io"
9811599116 ctx := &RegistryContext{
100117 DID: "did:plc:test123",
101118 Repository: "myapp",
102119 HoldDID: "did:web:default.hold.io", // Discovery-based hold (should be overridden)
103120 ATProtoClient: atproto.NewClient("https://pds.example.com", "did:plc:test123", ""),
121121+ Database: &mockDatabase{holdDID: dbHoldDID},
104122 }
105123106124 repo := NewRoutingRepository(nil, ctx)
107125 blobStore := repo.Blobs(context.Background())
108126109127 assert.NotNil(t, blobStore)
110110- // Verify the hold DID was updated to use the cached value
111111- assert.Equal(t, cachedHoldDID, repo.Ctx.HoldDID, "should use cached hold DID")
128128+ // Verify the hold DID was updated to use the database value
129129+ assert.Equal(t, dbHoldDID, repo.Ctx.HoldDID, "should use database hold DID")
112130}
113131114114-// TestRoutingRepository_Blobs_WithoutCache tests blob store with discovery-based hold
115115-func TestRoutingRepository_Blobs_WithoutCache(t *testing.T) {
132132+// TestRoutingRepository_Blobs_WithoutDatabase tests blob store with discovery-based hold
133133+func TestRoutingRepository_Blobs_WithoutDatabase(t *testing.T) {
116134 discoveryHoldDID := "did:web:discovery.hold.io"
117135118118- // Use a different DID/repo to avoid cache contamination from other tests
119136 ctx := &RegistryContext{
120137 DID: "did:plc:nocache456",
121138 Repository: "uncached-app",
122139 HoldDID: discoveryHoldDID,
123140 ATProtoClient: atproto.NewClient("https://pds.example.com", "did:plc:nocache456", ""),
141141+ Database: nil, // No database
124142 }
125143126144 repo := NewRoutingRepository(nil, ctx)
···131149 assert.Equal(t, discoveryHoldDID, repo.Ctx.HoldDID, "should use discovery-based hold DID")
132150}
133151152152+// TestRoutingRepository_Blobs_DatabaseEmptyFallback tests fallback when database returns empty hold DID
153153+func TestRoutingRepository_Blobs_DatabaseEmptyFallback(t *testing.T) {
154154+ discoveryHoldDID := "did:web:discovery.hold.io"
155155+156156+ ctx := &RegistryContext{
157157+ DID: "did:plc:test123",
158158+ Repository: "newapp",
159159+ HoldDID: discoveryHoldDID,
160160+ ATProtoClient: atproto.NewClient("https://pds.example.com", "did:plc:test123", ""),
161161+ Database: &mockDatabase{holdDID: ""}, // Empty string (no manifests yet)
162162+ }
163163+164164+ repo := NewRoutingRepository(nil, ctx)
165165+ blobStore := repo.Blobs(context.Background())
166166+167167+ assert.NotNil(t, blobStore)
168168+ // Verify the hold DID falls back to discovery-based
169169+ assert.Equal(t, discoveryHoldDID, repo.Ctx.HoldDID, "should fall back to discovery-based hold DID when database returns empty")
170170+}
171171+134172// TestRoutingRepository_BlobStoreCaching tests that blob store is cached
135173func TestRoutingRepository_BlobStoreCaching(t *testing.T) {
136174 ctx := &RegistryContext{
···254292 assert.NotNil(t, cachedBlobStore)
255293}
256294257257-// TestRoutingRepository_HoldCachePopulation tests that hold DID cache is populated after manifest fetch
258258-// Note: This test verifies the goroutine behavior with a delay
259259-func TestRoutingRepository_HoldCachePopulation(t *testing.T) {
295295+// TestRoutingRepository_Blobs_Priority tests that database hold DID takes priority over discovery
296296+func TestRoutingRepository_Blobs_Priority(t *testing.T) {
297297+ dbHoldDID := "did:web:database.hold.io"
298298+ discoveryHoldDID := "did:web:discovery.hold.io"
299299+260300 ctx := &RegistryContext{
261301 DID: "did:plc:test123",
262302 Repository: "myapp",
263263- HoldDID: "did:web:hold01.atcr.io",
303303+ HoldDID: discoveryHoldDID, // Discovery-based hold
264304 ATProtoClient: atproto.NewClient("https://pds.example.com", "did:plc:test123", ""),
305305+ Database: &mockDatabase{holdDID: dbHoldDID}, // Database has a different hold DID
265306 }
266307267308 repo := NewRoutingRepository(nil, ctx)
309309+ blobStore := repo.Blobs(context.Background())
268310269269- // Create manifest store (which triggers the cache population goroutine)
270270- _, err := repo.Manifests(context.Background())
271271- require.NoError(t, err)
272272-273273- // Wait for goroutine to complete (it has a 100ms sleep)
274274- time.Sleep(200 * time.Millisecond)
275275-276276- // Note: We can't easily verify the cache was populated without a real manifest fetch
277277- // The actual caching happens in GetLastFetchedHoldDID() which requires manifest operations
278278- // This test primarily verifies the Manifests() call doesn't panic with the goroutine
311311+ assert.NotNil(t, blobStore)
312312+ // Database hold DID should take priority over discovery
313313+ assert.Equal(t, dbHoldDID, repo.Ctx.HoldDID, "database hold DID should take priority over discovery")
279314}