Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place

dont check db on sites in hot tier unless 404

+91 -86
+77 -75
apps/hosting-service/src/lib/file-serving.ts
··· 20 20 import { normalizeFileCids } from '@wispplace/fs-utils'; 21 21 import { fetchAndCacheSite } from './on-demand-cache'; 22 22 import type { StorageResult } from '@wispplace/tiered-storage'; 23 + import { createLogger } from '@wispplace/observability'; 24 + 25 + const logger = createLogger('file-serving'); 23 26 24 27 type FileStorageResult = StorageResult<Uint8Array>; 25 28 ··· 34 37 if (result) { 35 38 const tier = result.source || 'unknown'; 36 39 const size = result.data ? (result.data as Uint8Array).length : 0; 37 - console.log(`[Storage] Served ${filePath} from ${tier} tier (${size} bytes) - ${did}:${rkey}`); 40 + logger.debug(`Served ${filePath} from ${tier} tier`, { did, rkey, size, tier }); 38 41 } 39 42 40 43 return result; ··· 136 139 applyCustomHeaders(headers, filePath, settings); 137 140 return new Response(decompressed, { headers }); 138 141 } 139 - console.warn(`File ${filePath} marked as gzipped but lacks magic bytes, serving as-is`); 142 + logger.warn(`File marked as gzipped but lacks magic bytes, serving as-is`, { filePath }); 140 143 applyCustomHeaders(headers, filePath, settings); 141 144 return new Response(content, { headers }); 142 145 } ··· 146 149 147 150 applyCustomHeaders(headers, filePath, settings); 148 151 return new Response(content, { headers }); 149 - } 150 - 151 - /** 152 - * Ensure a site is cached locally. If the site has no DB entry (completely unknown), 153 - * attempt to fetch and cache it on-demand from the PDS. 154 - */ 155 - async function ensureSiteCached(did: string, rkey: string): Promise<void> { 156 - const existing = await getSiteCache(did, rkey); 157 - if (existing) { 158 - // Site is in DB — check if any files actually exist in storage 159 - const prefix = `${did}/${rkey}/`; 160 - const hasFiles = await storage.exists(prefix.slice(0, -1)) || 161 - await checkAnyFileExists(did, rkey, existing.file_cids); 162 - if (hasFiles) { 163 - return; 164 - } 165 - console.log(`[FileServing] Site ${did}/${rkey} in DB but no files in storage, re-fetching`); 166 - } else { 167 - console.log(`[FileServing] Site ${did}/${rkey} not in DB, attempting on-demand cache`); 168 - } 169 - 170 - const success = await fetchAndCacheSite(did, rkey); 171 - console.log(`[FileServing] On-demand cache for ${did}/${rkey}: ${success ? 'success' : 'failed'}`); 172 - } 173 - 174 - async function checkAnyFileExists(did: string, rkey: string, fileCids: unknown): Promise<boolean> { 175 - if (!fileCids || typeof fileCids !== 'object') return false; 176 - const cids = fileCids as Record<string, string>; 177 - const firstFile = Object.keys(cids)[0]; 178 - if (!firstFile) return false; 179 - return storage.exists(`${did}/${rkey}/${firstFile}`); 180 152 } 181 153 182 154 /** ··· 189 161 fullUrl?: string, 190 162 headers?: Record<string, string> 191 163 ): Promise<Response> { 192 - // Check if this site is completely unknown (not in DB, no files in storage) 193 - // If so, attempt to fetch and cache it on-demand from the PDS 194 - await ensureSiteCached(did, rkey); 195 - 196 164 // Load settings for this site 197 165 const settings = await getCachedSettings(did, rkey); 198 166 const indexFiles = getIndexFiles(settings); ··· 318 286 requestPath = requestPath.slice(0, -1); 319 287 } 320 288 321 - // Check if this path is a directory first (best-effort via prefix scan) 322 - const directoryEntries = await listDirectoryEntries(did, rkey, requestPath); 323 - if (directoryEntries.length > 0) { 324 - // It's a directory, try each index file in order 289 + // For directory-like paths (empty or no extension), try index files FIRST (fast) 290 + // Only do expensive directory listing if needed for directory listing feature 291 + if (!requestPath || !requestPath.includes('.')) { 325 292 for (const indexFile of indexFiles) { 326 293 const indexPath = requestPath ? `${requestPath}/${indexFile}` : indexFile; 327 - if (await storageExists(did, rkey, indexPath)) { 328 - return serveFileInternal(did, rkey, indexPath, settings, requestHeaders); 294 + const result = await getFileWithMetadata(did, rkey, indexPath); 295 + if (result) { 296 + return buildResponseFromStorageResult(result, indexPath, settings, requestHeaders); 329 297 } 330 298 await markExpectedMiss(indexPath); 331 299 } 332 - // No index file found - check if directory listing is enabled 300 + 301 + // Index not found - check if we need directory listing 333 302 if (settings?.directoryListing) { 334 - const missResponse = await maybeReturnStorageMiss(); 335 - if (missResponse) return missResponse; 336 - const html = generateDirectoryListing(requestPath, directoryEntries); 337 - return new Response(html, { 338 - headers: { 339 - 'Content-Type': 'text/html; charset=utf-8', 340 - 'Cache-Control': 'public, max-age=300', 341 - }, 342 - }); 303 + const directoryEntries = await listDirectoryEntries(did, rkey, requestPath); 304 + if (directoryEntries.length > 0) { 305 + const missResponse = await maybeReturnStorageMiss(); 306 + if (missResponse) return missResponse; 307 + const html = generateDirectoryListing(requestPath, directoryEntries); 308 + return new Response(html, { 309 + headers: { 310 + 'Content-Type': 'text/html; charset=utf-8', 311 + 'Cache-Control': 'public, max-age=300', 312 + }, 313 + }); 314 + } 343 315 } 344 - // Fall through to 404/SPA handling 316 + // Fall through to normal file serving / 404 handling 345 317 } 346 318 347 319 // Not a directory, try to serve as a file ··· 457 429 458 430 const missResponse = await maybeReturnStorageMiss(); 459 431 if (missResponse) return missResponse; 432 + 433 + // Last resort: if site not in DB at all, try on-demand fetch 434 + const fileCids = await getExpectedFileCids(); 435 + if (fileCids === null) { 436 + logger.info(`Site not found in DB, attempting on-demand fetch before 404`, { did, rkey }); 437 + const success = await fetchAndCacheSite(did, rkey); 438 + if (success) { 439 + // Retry serving the originally requested file 440 + const retryPath = filePath || indexFiles[0] || 'index.html'; 441 + const retryResult = await getFileWithMetadata(did, rkey, retryPath); 442 + if (retryResult) { 443 + return buildResponseFromStorageResult(retryResult, retryPath, settings, requestHeaders); 444 + } 445 + } 446 + } 460 447 461 448 // Default styled 404 page 462 449 const html = generate404Page(); ··· 480 467 fullUrl?: string, 481 468 headers?: Record<string, string> 482 469 ): Promise<Response> { 483 - // Check if this site is completely unknown (not in DB, no files in storage) 484 - // If so, attempt to fetch and cache it on-demand from the PDS 485 - await ensureSiteCached(did, rkey); 486 - 487 470 // Load settings for this site 488 471 const settings = await getCachedSettings(did, rkey); 489 472 const indexFiles = getIndexFiles(settings); ··· 615 598 requestPath = requestPath.slice(0, -1); 616 599 } 617 600 618 - // Check if this path is a directory first (best-effort via prefix scan) 619 - const directoryEntries = await listDirectoryEntries(did, rkey, requestPath); 620 - if (directoryEntries.length > 0) { 621 - // It's a directory, try each index file in order 601 + // For directory-like paths (empty or no extension), try index files FIRST (fast) 602 + // Only do expensive directory listing if needed for directory listing feature 603 + if (!requestPath || !requestPath.includes('.')) { 622 604 for (const indexFile of indexFiles) { 623 605 const indexPath = requestPath ? `${requestPath}/${indexFile}` : indexFile; 624 - if (await storageExists(did, rkey, indexPath)) { 625 - return serveFileInternalWithRewrite(did, rkey, indexPath, basePath, settings, requestHeaders); 606 + const fileResult = await getFileForRequest(did, rkey, indexPath, true); 607 + if (fileResult) { 608 + return buildResponseFromStorageResult(fileResult.result, indexPath, settings, requestHeaders); 626 609 } 627 610 await markExpectedMiss(indexPath); 628 611 } 629 - // No index file found - check if directory listing is enabled 612 + 613 + // Index not found - check if we need directory listing 630 614 if (settings?.directoryListing) { 631 - const missResponse = await maybeReturnStorageMiss(); 632 - if (missResponse) return missResponse; 633 - const html = generateDirectoryListing(requestPath, directoryEntries); 634 - return new Response(html, { 635 - headers: { 636 - 'Content-Type': 'text/html; charset=utf-8', 637 - 'Cache-Control': 'public, max-age=300', 638 - }, 639 - }); 615 + const directoryEntries = await listDirectoryEntries(did, rkey, requestPath); 616 + if (directoryEntries.length > 0) { 617 + const missResponse = await maybeReturnStorageMiss(); 618 + if (missResponse) return missResponse; 619 + const html = generateDirectoryListing(requestPath, directoryEntries); 620 + return new Response(html, { 621 + headers: { 622 + 'Content-Type': 'text/html; charset=utf-8', 623 + 'Cache-Control': 'public, max-age=300', 624 + }, 625 + }); 626 + } 640 627 } 641 - // Fall through to 404/SPA handling 628 + // Fall through to normal file serving / 404 handling 642 629 } 643 630 644 631 // Not a directory, try to serve as a file ··· 736 723 737 724 const missResponse = await maybeReturnStorageMiss(); 738 725 if (missResponse) return missResponse; 726 + 727 + // Last resort: if site not in DB at all, try on-demand fetch 728 + const fileCids = await getExpectedFileCids(); 729 + if (fileCids === null) { 730 + logger.info(`Site not found in DB, attempting on-demand fetch before 404`, { did, rkey }); 731 + const success = await fetchAndCacheSite(did, rkey); 732 + if (success) { 733 + // Retry serving the originally requested file 734 + const retryPath = filePath || indexFiles[0] || 'index.html'; 735 + const retryResult = await getFileWithMetadata(did, rkey, retryPath); 736 + if (retryResult) { 737 + return buildResponseFromStorageResult(retryResult, retryPath, settings, requestHeaders); 738 + } 739 + } 740 + } 739 741 740 742 // Default styled 404 page 741 743 const html = generate404Page();
+14 -11
apps/hosting-service/src/lib/on-demand-cache.ts
··· 20 20 import { upsertSiteCache, tryAcquireLock, releaseLock } from './db'; 21 21 import { enqueueRevalidate } from './revalidate-queue'; 22 22 import { gunzipSync } from 'zlib'; 23 + import { createLogger } from '@wispplace/observability'; 24 + 25 + const logger = createLogger('on-demand-cache'); 23 26 24 27 // Track in-flight fetches to avoid duplicate work 25 28 const inFlightFetches = new Map<string, Promise<boolean>>(); ··· 65 68 // Try to acquire a distributed lock 66 69 const acquired = await tryAcquireLock(lockKey); 67 70 if (!acquired) { 68 - console.log(`[OnDemandCache] Lock not acquired for ${did}/${rkey}, another instance is handling it`); 71 + logger.debug('Lock not acquired, another instance is handling it', { did, rkey }); 69 72 return false; 70 73 } 71 74 72 75 try { 73 - console.log(`[OnDemandCache] Fetching missing site ${did}/${rkey}`); 76 + logger.info('Fetching missing site', { did, rkey }); 74 77 75 78 // Fetch site record from PDS 76 79 const pdsEndpoint = await getPdsForDid(did); 77 80 if (!pdsEndpoint) { 78 - console.error(`[OnDemandCache] Could not resolve PDS for ${did}`); 81 + logger.error('Could not resolve PDS', { did }); 79 82 return false; 80 83 } 81 84 ··· 87 90 } catch (err) { 88 91 const msg = err instanceof Error ? err.message : String(err); 89 92 if (msg.includes('HTTP 404') || msg.includes('Not Found')) { 90 - console.log(`[OnDemandCache] Site record not found on PDS: ${did}/${rkey}`); 93 + logger.info('Site record not found on PDS', { did, rkey }); 91 94 } else { 92 - console.error(`[OnDemandCache] Failed to fetch site record: ${did}/${rkey}`, msg); 95 + logger.error('Failed to fetch site record', { did, rkey, error: msg }); 93 96 } 94 97 return false; 95 98 } ··· 98 101 const recordCid = data.cid || ''; 99 102 100 103 if (!record?.root?.entries) { 101 - console.error(`[OnDemandCache] Invalid record structure for ${did}/${rkey}`); 104 + logger.error('Invalid record structure', { did, rkey }); 102 105 return false; 103 106 } 104 107 ··· 108 111 // Validate limits 109 112 const fileCount = countFilesInDirectory(expandedRoot); 110 113 if (fileCount > MAX_FILE_COUNT) { 111 - console.error(`[OnDemandCache] Site exceeds file limit: ${fileCount} > ${MAX_FILE_COUNT}`); 114 + logger.error('Site exceeds file limit', { did, rkey, fileCount, maxFileCount: MAX_FILE_COUNT }); 112 115 return false; 113 116 } 114 117 ··· 135 138 downloaded++; 136 139 } else { 137 140 failed++; 138 - console.error(`[OnDemandCache] Failed to download blob:`, result.reason); 141 + logger.error('Failed to download blob', { did, rkey, error: result.reason }); 139 142 } 140 143 } 141 144 } 142 145 143 - console.log(`[OnDemandCache] Downloaded ${downloaded} files (${failed} failed) for ${did}/${rkey}`); 146 + logger.info('Downloaded files', { did, rkey, downloaded, failed }); 144 147 145 148 // Update DB with file CIDs so future storage misses can be detected 146 149 await upsertSiteCache(did, rkey, recordCid, fileCids); ··· 148 151 // Enqueue revalidate so firehose-service backfills S3 (cold tier) 149 152 await enqueueRevalidate(did, rkey, `on-demand-cache`); 150 153 151 - console.log(`[OnDemandCache] Successfully cached site ${did}/${rkey}`); 154 + logger.info('Successfully cached site', { did, rkey, downloaded }); 152 155 return downloaded > 0; 153 156 } catch (err) { 154 - console.error(`[OnDemandCache] Error caching site ${did}/${rkey}:`, err); 157 + logger.error('Error caching site', { did, rkey, error: err }); 155 158 return false; 156 159 } finally { 157 160 await releaseLock(lockKey);