Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place

refactor hosting/firehose pipeline

- make hosting-service read-only and remove firehose/backfill
- add Redis Streams revalidation + metrics
- fix settings cache schema (did+rkey) and backfill flow
- normalize file_cids handling shared via fs-utils
- fix gzip handling and HTML rewrite pipeline

Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: gemini-cli <218195315+gemini-cli@users.noreply.github.com>
Co-authored-by: Linus Torvalds <torvalds@linux-foundation.org>

+926 -1910
+4 -2
apps/firehose-service/package.json
··· 4 4 "type": "module", 5 5 "scripts": { 6 6 "dev": "NODE_OPTIONS='--max-old-space-size=2048' tsx --env-file=.env src/index.ts", 7 + "dev:backfill": "NODE_OPTIONS='--max-old-space-size=2048' tsx --env-file=.env src/index.ts --backfill", 7 8 "build": "bun run build.ts", 8 9 "start": "NODE_OPTIONS='--max-old-space-size=2048' tsx src/index.ts", 9 10 "check": "tsc --noEmit", ··· 20 21 "@wispplace/fs-utils": "workspace:*", 21 22 "@wispplace/lexicons": "workspace:*", 22 23 "@wispplace/safe-fetch": "workspace:*", 24 + "@wispplace/tiered-storage": "workspace:*", 23 25 "hono": "^4.10.4", 26 + "ioredis": "^5.9.2", 24 27 "multiformats": "^13.4.1", 25 - "postgres": "^3.4.5", 26 - "@wispplace/tiered-storage": "workspace:*" 28 + "postgres": "^3.4.5" 27 29 }, 28 30 "devDependencies": { 29 31 "@types/node": "^22.10.5",
+5
apps/firehose-service/src/config.ts
··· 23 23 // Health check server 24 24 healthPort: parseInt(process.env.HEALTH_PORT || '3001', 10), 25 25 26 + // Redis revalidation queue 27 + redisUrl: process.env.REDIS_URL, 28 + revalidateStream: process.env.WISP_REVALIDATE_STREAM || 'wisp:revalidate', 29 + revalidateGroup: process.env.WISP_REVALIDATE_GROUP || 'firehose-service', 30 + 26 31 // Mode 27 32 isBackfill: process.argv.includes('--backfill'), 28 33 } as const;
+29 -5
apps/firehose-service/src/index.ts
··· 10 10 import { serve } from '@hono/node-server'; 11 11 import { config } from './config'; 12 12 import { startFirehose, stopFirehose, getFirehoseHealth } from './lib/firehose'; 13 - import { closeDatabase, listAllSiteCaches, getSiteCache } from './lib/db'; 13 + import { closeDatabase, listAllSiteCaches, listAllSites, getSiteCache } from './lib/db'; 14 14 import { storage } from './lib/storage'; 15 15 import { handleSiteCreateOrUpdate, fetchSiteRecord } from './lib/cache-writer'; 16 + import { startRevalidateWorker, stopRevalidateWorker } from './lib/revalidate-worker'; 16 17 17 18 const app = new Hono(); 18 19 ··· 39 40 console.log(`\n[Service] Received ${signal}, shutting down...`); 40 41 41 42 stopFirehose(); 43 + await stopRevalidateWorker(); 42 44 await closeDatabase(); 43 45 44 46 console.log('[Service] Shutdown complete'); ··· 53 55 */ 54 56 async function runBackfill(): Promise<void> { 55 57 console.log('[Backfill] Starting backfill mode'); 58 + const startTime = Date.now(); 59 + const forceRewriteHtml = process.env.BACKFILL_FORCE_REWRITE_HTML === 'true'; 56 60 57 - const sites = await listAllSiteCaches(); 61 + if (forceRewriteHtml) { 62 + console.log('[Backfill] Forcing HTML rewrite for all sites'); 63 + } 64 + 65 + let sites = await listAllSites(); 66 + if (sites.length === 0) { 67 + const cachedSites = await listAllSiteCaches(); 68 + sites = cachedSites.map(site => ({ did: site.did, rkey: site.rkey })); 69 + console.log('[Backfill] Sites table empty; falling back to site_cache entries'); 70 + } 71 + 58 72 console.log(`[Backfill] Found ${sites.length} sites in database`); 59 73 60 74 let processed = 0; ··· 72 86 continue; 73 87 } 74 88 89 + const existingCache = await getSiteCache(site.did, site.rkey); 75 90 // Check if CID matches (already up to date) 76 - if (result.cid === site.record_cid) { 91 + if (!forceRewriteHtml && existingCache && result.cid === existingCache.record_cid) { 77 92 console.log(`[Backfill] Site already up to date: ${site.did}/${site.rkey}`); 78 93 skipped++; 79 94 continue; 80 95 } 81 96 82 97 // Process the site 83 - await handleSiteCreateOrUpdate(site.did, site.rkey, result.record, result.cid); 98 + await handleSiteCreateOrUpdate(site.did, site.rkey, result.record, result.cid, { 99 + forceRewriteHtml, 100 + }); 84 101 processed++; 85 102 86 103 console.log(`[Backfill] Progress: ${processed + skipped + failed}/${sites.length}`); ··· 90 107 } 91 108 } 92 109 93 - console.log(`[Backfill] Complete: ${processed} processed, ${skipped} skipped, ${failed} failed`); 110 + const elapsedMs = Date.now() - startTime; 111 + const elapsedSec = Math.round(elapsedMs / 1000); 112 + const elapsedMin = Math.floor(elapsedSec / 60); 113 + const elapsedRemSec = elapsedSec % 60; 114 + const elapsedLabel = elapsedMin > 0 ? `${elapsedMin}m ${elapsedRemSec}s` : `${elapsedSec}s`; 115 + 116 + console.log(`[Backfill] Complete: ${processed} processed, ${skipped} skipped, ${failed} failed (${elapsedLabel} elapsed)`); 94 117 } 95 118 96 119 // Main entry point ··· 115 138 } else { 116 139 // Start firehose 117 140 startFirehose(); 141 + await startRevalidateWorker(); 118 142 } 119 143 } 120 144
+100 -47
apps/firehose-service/src/lib/cache-writer.ts
··· 8 8 import type { Record as WispSettings } from '@wispplace/lexicons/types/place/wisp/settings'; 9 9 import { safeFetchJson, safeFetchBlob } from '@wispplace/safe-fetch'; 10 10 import { extractBlobCid, getPdsForDid } from '@wispplace/atproto-utils'; 11 - import { collectFileCidsFromEntries, countFilesInDirectory } from '@wispplace/fs-utils'; 11 + import { collectFileCidsFromEntries, countFilesInDirectory, normalizeFileCids } from '@wispplace/fs-utils'; 12 12 import { shouldCompressMimeType } from '@wispplace/atproto-utils/compression'; 13 13 import { MAX_BLOB_SIZE, MAX_FILE_COUNT, MAX_SITE_SIZE } from '@wispplace/constants'; 14 14 import { writeFile, deleteFile, listFiles } from './storage'; ··· 244 244 base64?: boolean; 245 245 } 246 246 247 - type FileCidsNormalization = { 248 - value: Record<string, string>; 249 - source: 'object' | 'array' | 'string-json' | 'string-invalid' | 'null' | 'other'; 250 - }; 247 + function isTextLikeMime(mimeType?: string, path?: string): boolean { 248 + if (mimeType) { 249 + if (mimeType === 'text/html') return true; 250 + if (mimeType === 'text/css') return true; 251 + if (mimeType === 'text/javascript') return true; 252 + if (mimeType === 'application/javascript') return true; 253 + if (mimeType === 'application/json') return true; 254 + if (mimeType === 'application/xml') return true; 255 + if (mimeType === 'image/svg+xml') return true; 256 + } 251 257 252 - function normalizeFileCids(value: unknown): FileCidsNormalization { 253 - if (value == null) return { value: {}, source: 'null' }; 258 + if (!path) return false; 259 + const lower = path.toLowerCase(); 260 + return lower.endsWith('.html') || 261 + lower.endsWith('.htm') || 262 + lower.endsWith('.css') || 263 + lower.endsWith('.js') || 264 + lower.endsWith('.json') || 265 + lower.endsWith('.xml') || 266 + lower.endsWith('.svg'); 267 + } 254 268 255 - if (typeof value === 'string') { 256 - try { 257 - const parsed = JSON.parse(value) as unknown; 258 - if (Array.isArray(parsed)) { 259 - const normalized = normalizeFileCids(parsed); 260 - return { value: normalized.value, source: 'string-json' }; 261 - } 262 - if (parsed && typeof parsed === 'object') { 263 - return { value: parsed as Record<string, string>, source: 'string-json' }; 264 - } 265 - } catch { 266 - // fall through to invalid 269 + function looksLikeBase64(content: Uint8Array): boolean { 270 + if (content.length === 0) return false; 271 + let nonWhitespace = 0; 272 + for (const byte of content) { 273 + const char = byte; 274 + if (char === 0x0a || char === 0x0d || char === 0x20 || char === 0x09) { 275 + continue; 267 276 } 268 - return { value: {}, source: 'string-invalid' }; 277 + nonWhitespace++; 278 + const isBase64Char = 279 + (char >= 0x41 && char <= 0x5a) || // A-Z 280 + (char >= 0x61 && char <= 0x7a) || // a-z 281 + (char >= 0x30 && char <= 0x39) || // 0-9 282 + char === 0x2b || // + 283 + char === 0x2f || // / 284 + char === 0x3d; // = 285 + if (!isBase64Char) return false; 269 286 } 270 287 271 - if (Array.isArray(value)) { 272 - const result: Record<string, string> = {}; 273 - for (const item of value) { 274 - if (item && typeof item === 'object' && 'path' in item && 'cid' in item) { 275 - const path = (item as any).path; 276 - const cid = (item as any).cid; 277 - if (typeof path === 'string' && typeof cid === 'string') { 278 - result[path] = cid; 279 - } 280 - } 281 - } 282 - return { value: result, source: 'array' }; 283 - } 288 + // Base64 length should be divisible by 4 (ignoring whitespace) 289 + return nonWhitespace % 4 === 0; 290 + } 284 291 285 - if (typeof value === 'object') { 286 - return { value: value as Record<string, string>, source: 'object' }; 292 + function tryDecodeBase64(content: Uint8Array): Uint8Array | null { 293 + if (!looksLikeBase64(content)) return null; 294 + const base64String = new TextDecoder().decode(content).replace(/\s+/g, ''); 295 + try { 296 + return Buffer.from(base64String, 'base64'); 297 + } catch { 298 + return null; 287 299 } 288 - 289 - return { value: {}, source: 'other' }; 290 300 } 291 301 292 302 /** ··· 334 344 console.log(`[Cache] Downloading ${file.path}`); 335 345 336 346 let content = await safeFetchBlob(blobUrl, { maxSize: MAX_BLOB_SIZE, timeout: 300000 }); 347 + let encoding = file.encoding; 337 348 338 349 // Decode base64 if needed 339 350 if (file.base64) { 340 351 const textDecoder = new TextDecoder(); 341 352 const base64String = textDecoder.decode(content); 342 353 content = Buffer.from(base64String, 'base64'); 354 + } else if (isTextLikeMime(file.mimeType, file.path)) { 355 + // Heuristic fallback: some records omit base64 flag but content is base64 text 356 + const decoded = tryDecodeBase64(content); 357 + if (decoded) { 358 + console.warn(`[Cache] Decoded base64 fallback for ${file.path} (base64 flag missing)`); 359 + content = decoded; 360 + } 343 361 } 344 362 345 363 // Decompress if needed and shouldn't stay compressed 346 364 const shouldStayCompressed = shouldCompressMimeType(file.mimeType); 347 - let encoding = file.encoding; 348 365 349 366 if (encoding === 'gzip' && !shouldStayCompressed && content.length >= 2 && 350 367 content[0] === 0x1f && content[1] === 0x8b) { ··· 354 371 } catch (error) { 355 372 console.error(`[Cache] Failed to decompress ${file.path}, storing gzipped`); 356 373 } 374 + } else if (encoding === 'gzip' && content.length >= 2 && 375 + !(content[0] === 0x1f && content[1] === 0x8b)) { 376 + // If marked gzip but doesn't look gzipped, attempt base64 decode and retry 377 + const decoded = tryDecodeBase64(content); 378 + if (decoded && decoded.length >= 2 && decoded[0] === 0x1f && decoded[1] === 0x8b) { 379 + console.warn(`[Cache] Decoded base64+gzip fallback for ${file.path}`); 380 + try { 381 + content = gunzipSync(decoded); 382 + encoding = undefined; 383 + } catch (error) { 384 + console.error(`[Cache] Failed to decompress base64+gzip fallback for ${file.path}, storing gzipped`); 385 + content = decoded; 386 + } 387 + } 388 + } 389 + 390 + // If encoding is missing but data looks gzipped for a text-like file, mark it 391 + if (!encoding && isTextLikeMime(file.mimeType, file.path) && content.length >= 2 && 392 + content[0] === 0x1f && content[1] === 0x8b) { 393 + encoding = 'gzip'; 357 394 } 358 395 359 396 // Build storage key ··· 370 407 // If HTML, also write rewritten version 371 408 if (isHtmlFile(file.path)) { 372 409 const basePath = `/${did}/${rkey}/`; 373 - const htmlString = new TextDecoder().decode(content); 410 + let rewriteSource = content; 411 + if (encoding === 'gzip' && content.length >= 2 && content[0] === 0x1f && content[1] === 0x8b) { 412 + try { 413 + rewriteSource = gunzipSync(content); 414 + } catch (error) { 415 + console.error(`[Cache] Failed to decompress ${file.path} for rewrite, using raw content`); 416 + } 417 + } 418 + 419 + const htmlString = new TextDecoder().decode(rewriteSource); 374 420 const rewritten = rewriteHtmlPaths(htmlString, basePath, file.path); 375 421 const rewrittenContent = new TextEncoder().encode(rewritten); 376 422 ··· 389 435 did: string, 390 436 rkey: string, 391 437 record: WispFsRecord, 392 - recordCid: string 438 + recordCid: string, 439 + options?: { 440 + forceRewriteHtml?: boolean; 441 + } 393 442 ): Promise<void> { 394 - console.log(`[Cache] Processing site ${did}/${rkey}, record CID: ${recordCid}`); 443 + const forceRewriteHtml = options?.forceRewriteHtml === true; 444 + console.log(`[Cache] Processing site ${did}/${rkey}, record CID: ${recordCid}`, { 445 + forceRewriteHtml, 446 + }); 395 447 396 448 if (!record.root?.entries) { 397 449 console.error('[Cache] Invalid record structure'); ··· 445 497 446 498 // Find new or changed files 447 499 for (const file of newFiles) { 448 - if (oldFileCids[file.path] !== file.cid) { 500 + const shouldForceRewrite = forceRewriteHtml && isHtmlFile(file.path); 501 + if (oldFileCids[file.path] !== file.cid || shouldForceRewrite) { 449 502 filesToDownload.push(file); 450 503 } 451 504 } ··· 523 576 * Handle settings create/update event 524 577 */ 525 578 export async function handleSettingsUpdate(did: string, rkey: string, settings: WispSettings, recordCid: string): Promise<void> { 526 - console.log(`[Cache] Updating settings for ${did}`); 579 + console.log(`[Cache] Updating settings for ${did}/${rkey}`); 527 580 528 - await upsertSiteSettingsCache(did, recordCid, { 581 + await upsertSiteSettingsCache(did, rkey, recordCid, { 529 582 directoryListing: settings.directoryListing, 530 583 spaMode: settings.spaMode, 531 584 custom404: settings.custom404, ··· 538 591 /** 539 592 * Handle settings delete event 540 593 */ 541 - export async function handleSettingsDelete(did: string): Promise<void> { 542 - console.log(`[Cache] Deleting settings for ${did}`); 543 - await deleteSiteSettingsCache(did); 594 + export async function handleSettingsDelete(did: string, rkey: string): Promise<void> { 595 + console.log(`[Cache] Deleting settings for ${did}/${rkey}`); 596 + await deleteSiteSettingsCache(did, rkey); 544 597 }
+21 -11
apps/firehose-service/src/lib/db.ts
··· 1 1 import postgres from 'postgres'; 2 - import type { SiteCache, SiteSettingsCache } from '@wispplace/database'; 2 + import type { SiteCache, SiteRecord, SiteSettingsCache } from '@wispplace/database'; 3 3 import { config } from '../config'; 4 4 5 5 const sql = postgres(config.databaseUrl, { ··· 20 20 return result[0] || null; 21 21 } 22 22 23 - export async function getSiteSettingsCache(did: string): Promise<SiteSettingsCache | null> { 23 + export async function getSiteSettingsCache(did: string, rkey: string): Promise<SiteSettingsCache | null> { 24 24 const result = await sql<SiteSettingsCache[]>` 25 - SELECT did, record_cid, directory_listing, spa_mode, custom_404, index_files, clean_urls, headers, cached_at, updated_at 25 + SELECT did, rkey, record_cid, directory_listing, spa_mode, custom_404, index_files, clean_urls, headers, cached_at, updated_at 26 26 FROM site_settings_cache 27 - WHERE did = ${did} 27 + WHERE did = ${did} AND rkey = ${rkey} 28 28 LIMIT 1 29 29 `; 30 30 return result[0] || null; ··· 34 34 return await sql<SiteCache[]>` 35 35 SELECT did, rkey, record_cid, file_cids, cached_at, updated_at 36 36 FROM site_cache 37 + ORDER BY updated_at DESC 38 + `; 39 + } 40 + 41 + export async function listAllSites(): Promise<SiteRecord[]> { 42 + return await sql<SiteRecord[]>` 43 + SELECT did, rkey, display_name, created_at, updated_at 44 + FROM sites 37 45 ORDER BY updated_at DESC 38 46 `; 39 47 } ··· 72 80 73 81 export async function upsertSiteSettingsCache( 74 82 did: string, 83 + rkey: string, 75 84 recordCid: string, 76 85 settings: { 77 86 directoryListing: boolean; ··· 89 98 const cleanUrls = settings.cleanUrls ?? true; 90 99 const headersJson = settings.headers ?? []; 91 100 92 - console.log(`[DB] upsertSiteSettingsCache starting for ${did}`, { 101 + console.log(`[DB] upsertSiteSettingsCache starting for ${did}/${rkey}`, { 93 102 directoryListing, 94 103 spaMode, 95 104 custom404, ··· 100 109 101 110 try { 102 111 await sql` 103 - INSERT INTO site_settings_cache (did, record_cid, directory_listing, spa_mode, custom_404, index_files, clean_urls, headers, cached_at, updated_at) 112 + INSERT INTO site_settings_cache (did, rkey, record_cid, directory_listing, spa_mode, custom_404, index_files, clean_urls, headers, cached_at, updated_at) 104 113 VALUES ( 105 114 ${did}, 115 + ${rkey}, 106 116 ${recordCid}, 107 117 ${directoryListing}, 108 118 ${spaMode}, ··· 113 123 EXTRACT(EPOCH FROM NOW()), 114 124 EXTRACT(EPOCH FROM NOW()) 115 125 ) 116 - ON CONFLICT (did) 126 + ON CONFLICT (did, rkey) 117 127 DO UPDATE SET 118 128 record_cid = EXCLUDED.record_cid, 119 129 directory_listing = EXCLUDED.directory_listing, ··· 124 134 headers = EXCLUDED.headers, 125 135 updated_at = EXTRACT(EPOCH FROM NOW()) 126 136 `; 127 - console.log(`[DB] upsertSiteSettingsCache completed for ${did}`); 137 + console.log(`[DB] upsertSiteSettingsCache completed for ${did}/${rkey}`); 128 138 } catch (err) { 129 139 const error = err instanceof Error ? err : new Error(String(err)); 130 - console.error('[DB] upsertSiteSettingsCache error:', { did, error: error.message, stack: error.stack }); 140 + console.error('[DB] upsertSiteSettingsCache error:', { did, rkey, error: error.message, stack: error.stack }); 131 141 throw error; 132 142 } 133 143 } 134 144 135 - export async function deleteSiteSettingsCache(did: string): Promise<void> { 136 - await sql`DELETE FROM site_settings_cache WHERE did = ${did}`; 145 + export async function deleteSiteSettingsCache(did: string, rkey: string): Promise<void> { 146 + await sql`DELETE FROM site_settings_cache WHERE did = ${did} AND rkey = ${rkey}`; 137 147 } 138 148 139 149 export async function closeDatabase(): Promise<void> {
+1 -1
apps/firehose-service/src/lib/firehose.ts
··· 106 106 processWithConcurrencyLimit(async () => { 107 107 try { 108 108 if (commitEvt.event === 'delete') { 109 - await handleSettingsDelete(did); 109 + await handleSettingsDelete(did, rkey); 110 110 } else if (record) { 111 111 const cidStr = cid?.toString() || ''; 112 112 await handleSettingsUpdate(did, rkey, record as WispSettings, cidStr);
+181
apps/firehose-service/src/lib/revalidate-worker.ts
··· 1 + import Redis from 'ioredis'; 2 + import os from 'os'; 3 + import { config } from '../config'; 4 + import { fetchSiteRecord, handleSiteCreateOrUpdate } from './cache-writer'; 5 + 6 + const consumerName = process.env.WISP_REVALIDATE_CONSUMER || `${os.hostname()}:${process.pid}`; 7 + const batchSize = Number.parseInt(process.env.WISP_REVALIDATE_BATCH_SIZE || '10', 10); 8 + const claimIdleMs = Number.parseInt(process.env.WISP_REVALIDATE_CLAIM_IDLE_MS || '60000', 10); 9 + const blockMs = Number.parseInt(process.env.WISP_REVALIDATE_BLOCK_MS || '5000', 10); 10 + 11 + let redis: Redis | null = null; 12 + let running = false; 13 + let loopPromise: Promise<void> | null = null; 14 + 15 + function parseFields(raw: string[]): Record<string, string> { 16 + const fields: Record<string, string> = {}; 17 + for (let i = 0; i < raw.length; i += 2) { 18 + const key = raw[i]; 19 + const value = raw[i + 1]; 20 + if (key) { 21 + fields[key] = value ?? ''; 22 + } 23 + } 24 + return fields; 25 + } 26 + 27 + async function processMessage(id: string, rawFields: string[]): Promise<void> { 28 + if (!redis) return; 29 + 30 + const fields = parseFields(rawFields); 31 + const did = fields.did; 32 + const rkey = fields.rkey; 33 + const reason = fields.reason || 'storage-miss'; 34 + 35 + if (!did || !rkey) { 36 + console.warn('[Revalidate] Missing did/rkey in message', { id, fields }); 37 + await redis.xack(config.revalidateStream, config.revalidateGroup, id); 38 + return; 39 + } 40 + 41 + console.log('[Revalidate] Processing', { did, rkey, reason, id }); 42 + 43 + const record = await fetchSiteRecord(did, rkey); 44 + if (!record) { 45 + console.warn('[Revalidate] Site record not found', { did, rkey }); 46 + await redis.xack(config.revalidateStream, config.revalidateGroup, id); 47 + return; 48 + } 49 + 50 + await handleSiteCreateOrUpdate(did, rkey, record.record, record.cid); 51 + 52 + await redis.xack(config.revalidateStream, config.revalidateGroup, id); 53 + } 54 + 55 + async function processMessages(messages: Array<[string, string[]]>): Promise<void> { 56 + for (const [id, rawFields] of messages) { 57 + try { 58 + await processMessage(id, rawFields); 59 + } catch (err) { 60 + const error = err instanceof Error ? err : new Error(String(err)); 61 + console.error('[Revalidate] Failed to process message', { id, error: error.message, stack: error.stack }); 62 + } 63 + } 64 + } 65 + 66 + async function ensureGroup(): Promise<void> { 67 + if (!redis) return; 68 + try { 69 + await redis.xgroup('CREATE', config.revalidateStream, config.revalidateGroup, '0', 'MKSTREAM'); 70 + } catch (err) { 71 + const error = err instanceof Error ? err : new Error(String(err)); 72 + if (!error.message.includes('BUSYGROUP')) { 73 + throw error; 74 + } 75 + } 76 + } 77 + 78 + async function claimStaleMessages(): Promise<void> { 79 + if (!redis) return; 80 + 81 + let startId = '0-0'; 82 + 83 + while (running) { 84 + const response = (await redis.xautoclaim( 85 + config.revalidateStream, 86 + config.revalidateGroup, 87 + consumerName, 88 + claimIdleMs, 89 + startId, 90 + 'COUNT', 91 + batchSize 92 + )) as unknown as [string, Array<[string, string[]]>]; 93 + 94 + const nextId = response[0]; 95 + const messages = response[1] || []; 96 + 97 + if (messages.length === 0) { 98 + break; 99 + } 100 + 101 + await processMessages(messages); 102 + 103 + if (nextId === startId) { 104 + break; 105 + } 106 + startId = nextId; 107 + } 108 + } 109 + 110 + async function readNewMessages(): Promise<void> { 111 + if (!redis) return; 112 + 113 + const response = await redis.xreadgroup( 114 + 'GROUP', 115 + config.revalidateGroup, 116 + consumerName, 117 + 'BLOCK', 118 + blockMs, 119 + 'COUNT', 120 + batchSize, 121 + 'STREAMS', 122 + config.revalidateStream, 123 + '>' 124 + ); 125 + 126 + if (!response) return; 127 + 128 + for (const [, messages] of response) { 129 + await processMessages(messages as Array<[string, string[]]>); 130 + } 131 + } 132 + 133 + async function runLoop(): Promise<void> { 134 + if (!redis) return; 135 + 136 + await ensureGroup(); 137 + 138 + while (running) { 139 + try { 140 + await claimStaleMessages(); 141 + await readNewMessages(); 142 + } catch (err) { 143 + const error = err instanceof Error ? err : new Error(String(err)); 144 + console.error('[Revalidate] Loop error', { error: error.message, stack: error.stack }); 145 + await new Promise((resolve) => setTimeout(resolve, 1000)); 146 + } 147 + } 148 + } 149 + 150 + export async function startRevalidateWorker(): Promise<void> { 151 + if (!config.redisUrl) { 152 + console.warn('[Revalidate] REDIS_URL not set; revalidate worker disabled'); 153 + return; 154 + } 155 + 156 + if (running) return; 157 + 158 + redis = new Redis(config.redisUrl, { 159 + maxRetriesPerRequest: 2, 160 + enableReadyCheck: true, 161 + }); 162 + 163 + redis.on('error', (err) => { 164 + console.error('[Revalidate] Redis error:', err); 165 + }); 166 + 167 + running = true; 168 + loopPromise = runLoop(); 169 + } 170 + 171 + export async function stopRevalidateWorker(): Promise<void> { 172 + running = false; 173 + await loopPromise; 174 + loopPromise = null; 175 + 176 + if (redis) { 177 + const toClose = redis; 178 + redis = null; 179 + await toClose.quit(); 180 + } 181 + }
+1 -13
apps/hosting-service/docker-entrypoint.sh
··· 4 4 # Run different modes based on MODE environment variable 5 5 # Modes: 6 6 # - server (default): Start the hosting service 7 - # - backfill: Run cache backfill and exit 8 - # - backfill-server: Run cache backfill, then start the server 9 7 10 8 MODE="${MODE:-server}" 11 9 12 10 case "$MODE" in 13 - backfill) 14 - echo "🔄 Running in backfill-only mode..." 15 - exec npm run backfill 16 - ;; 17 - backfill-server) 18 - echo "🔄 Running backfill, then starting server..." 19 - npm run backfill 20 - echo "✅ Backfill complete, starting server..." 21 - exec npm run start 22 - ;; 23 11 server) 24 12 echo "🚀 Starting server..." 25 13 exec npm run start 26 14 ;; 27 15 *) 28 16 echo "❌ Unknown MODE: $MODE" 29 - echo "Valid modes: server, backfill, backfill-server" 17 + echo "Valid modes: server" 30 18 exit 1 31 19 ;; 32 20 esac
+4 -4
apps/hosting-service/package.json
··· 6 6 "dev": "NODE_OPTIONS='--max-old-space-size=2048' tsx --env-file=.env src/index.ts", 7 7 "build": "bun run build.ts", 8 8 "start": "NODE_OPTIONS='--max-old-space-size=2048' tsx src/index.ts", 9 - "check": "tsc --noEmit", 10 - "backfill": "NODE_OPTIONS='--max-old-space-size=2048' tsx src/index.ts --backfill" 9 + "check": "tsc --noEmit" 11 10 }, 12 11 "dependencies": { 13 12 "@atproto/api": "^0.17.4", ··· 23 22 "@wispplace/lexicons": "workspace:*", 24 23 "@wispplace/observability": "workspace:*", 25 24 "@wispplace/safe-fetch": "workspace:*", 25 + "@wispplace/tiered-storage": "workspace:*", 26 26 "hono": "^4.10.4", 27 + "ioredis": "^5.9.2", 27 28 "mime-types": "^2.1.35", 28 29 "multiformats": "^13.4.1", 29 - "postgres": "^3.4.5", 30 - "@wispplace/tiered-storage": "workspace:*" 30 + "postgres": "^3.4.5" 31 31 }, 32 32 "devDependencies": { 33 33 "@types/bun": "^1.3.1",
+7 -51
apps/hosting-service/src/index.ts
··· 1 1 import app from './server'; 2 2 import { serve } from '@hono/node-server'; 3 - import { FirehoseWorker } from './lib/firehose'; 4 - import { createLogger, initializeGrafanaExporters } from '@wispplace/observability'; 3 + import { initializeGrafanaExporters } from '@wispplace/observability'; 5 4 import { mkdirSync, existsSync } from 'fs'; 6 - import { backfillCache } from './lib/backfill'; 7 - import { startDomainCacheCleanup, stopDomainCacheCleanup, setCacheOnlyMode, closeDatabase } from './lib/db'; 5 + import { startDomainCacheCleanup, stopDomainCacheCleanup, closeDatabase } from './lib/db'; 6 + import { closeRevalidateQueue } from './lib/revalidate-queue'; 8 7 import { storage, getStorageConfig } from './lib/storage'; 9 8 10 9 // Initialize Grafana exporters if configured ··· 13 12 serviceVersion: '1.0.0' 14 13 }); 15 14 16 - const logger = createLogger('hosting-service'); 17 - 18 15 const PORT = process.env.PORT ? parseInt(process.env.PORT) : 3001; 19 16 const CACHE_DIR = process.env.CACHE_DIR || './cache/sites'; 20 - const BACKFILL_CONCURRENCY = process.env.BACKFILL_CONCURRENCY 21 - ? parseInt(process.env.BACKFILL_CONCURRENCY) 22 - : undefined; // Let backfill.ts default (10) apply 23 - 24 - // Parse CLI arguments 25 - const args = process.argv.slice(2); 26 - const hasBackfillFlag = args.includes('--backfill'); 27 - const backfillOnStartup = hasBackfillFlag || process.env.BACKFILL_ON_STARTUP === 'true'; 28 - 29 - // Cache-only mode: service will only cache files locally, no DB writes 30 - const hasCacheOnlyFlag = args.includes('--cache-only'); 31 - export const CACHE_ONLY_MODE = hasCacheOnlyFlag || process.env.CACHE_ONLY_MODE === 'true'; 32 - 33 - // Configure cache-only mode in database module 34 - if (CACHE_ONLY_MODE) { 35 - setCacheOnlyMode(true); 36 - } 37 17 38 18 // Ensure cache directory exists 39 19 if (!existsSync(CACHE_DIR)) { ··· 44 24 // Start domain cache cleanup 45 25 startDomainCacheCleanup(); 46 26 47 - // Start firehose worker with observability logger 48 - const firehose = new FirehoseWorker((msg, data) => { 49 - logger.info(msg, data); 50 - }); 51 - 52 - firehose.start(); 53 - 54 27 // Optional: Bootstrap hot cache from warm tier on startup 55 28 const BOOTSTRAP_HOT_ON_STARTUP = process.env.BOOTSTRAP_HOT_ON_STARTUP === 'true'; 56 29 const BOOTSTRAP_HOT_LIMIT = process.env.BOOTSTRAP_HOT_LIMIT ? parseInt(process.env.BOOTSTRAP_HOT_LIMIT) : 100; ··· 66 39 }); 67 40 } 68 41 69 - // Run backfill if requested 70 - if (backfillOnStartup) { 71 - console.log('🔄 Backfill requested, starting cache backfill...'); 72 - backfillCache({ 73 - skipExisting: true, 74 - concurrency: BACKFILL_CONCURRENCY, 75 - }).then((stats) => { 76 - console.log('✅ Cache backfill completed'); 77 - }).catch((err) => { 78 - console.error('❌ Cache backfill error:', err); 79 - }); 80 - } 81 - 82 42 // Add health check endpoint 83 43 app.get('/health', async (c) => { 84 - const firehoseHealth = firehose.getHealth(); 85 44 const storageStats = await storage.getStats(); 86 45 87 46 return c.json({ 88 47 status: 'ok', 89 - firehose: firehoseHealth, 90 48 storage: storageStats, 91 49 }); 92 50 }); ··· 101 59 const storageConfig = getStorageConfig(); 102 60 103 61 console.log(` 104 - Wisp Hosting Service with Tiered Storage 62 + Wisp Hosting Service (Read-Only) with Tiered Storage 105 63 106 64 Server: http://localhost:${PORT} 107 65 Health: http://localhost:${PORT}/health 108 - Cache-Only: ${CACHE_ONLY_MODE ? 'ENABLED (no DB writes)' : 'DISABLED'} 109 - Backfill: ${backfillOnStartup ? `ENABLED (concurrency: ${BACKFILL_CONCURRENCY || 10})` : 'DISABLED'} 110 66 111 67 Tiered Storage Configuration: 112 68 Hot Cache: ${storageConfig.hotCacheSize} (${storageConfig.hotCacheCount} items max) ··· 117 73 S3 Prefix: ${storageConfig.s3Prefix} 118 74 Metadata Bucket: ${storageConfig.metadataBucket} 119 75 120 - Firehose: Connecting... 76 + Firehose: DISABLED (read-only) 121 77 `); 122 78 123 79 // Graceful shutdown 124 80 process.on('SIGINT', async () => { 125 81 console.log('\n🛑 Shutting down...'); 126 - await firehose.stop(); 127 82 stopDomainCacheCleanup(); 83 + await closeRevalidateQueue(); 128 84 await closeDatabase(); 129 85 server.close(); 130 86 process.exit(0); ··· 132 88 133 89 process.on('SIGTERM', async () => { 134 90 console.log('\n🛑 Shutting down...'); 135 - await firehose.stop(); 136 91 stopDomainCacheCleanup(); 92 + await closeRevalidateQueue(); 137 93 await closeDatabase(); 138 94 server.close(); 139 95 process.exit(0);
-159
apps/hosting-service/src/lib/backfill.ts
··· 1 - import { getAllSites } from './db'; 2 - import { fetchSiteRecord, getPdsForDid, downloadAndCacheSite, isCached } from './utils'; 3 - import { createLogger } from '@wispplace/observability'; 4 - import { markSiteAsBeingCached, unmarkSiteAsBeingCached } from './cache'; 5 - import { clearRedirectRulesCache } from './site-cache'; 6 - 7 - const logger = createLogger('hosting-service'); 8 - 9 - export interface BackfillOptions { 10 - skipExisting?: boolean; // Skip sites already in cache 11 - concurrency?: number; // Number of sites to cache concurrently 12 - maxSites?: number; // Maximum number of sites to backfill (for testing) 13 - } 14 - 15 - export interface BackfillStats { 16 - total: number; 17 - cached: number; 18 - skipped: number; 19 - failed: number; 20 - duration: number; 21 - } 22 - 23 - /** 24 - * Backfill all sites from the database into the local cache 25 - */ 26 - export async function backfillCache(options: BackfillOptions = {}): Promise<BackfillStats> { 27 - const { 28 - skipExisting = true, 29 - concurrency = 10, // Increased from 3 to 10 for better parallelization 30 - maxSites, 31 - } = options; 32 - 33 - const startTime = Date.now(); 34 - const stats: BackfillStats = { 35 - total: 0, 36 - cached: 0, 37 - skipped: 0, 38 - failed: 0, 39 - duration: 0, 40 - }; 41 - 42 - logger.info('Starting cache backfill', { skipExisting, concurrency, maxSites }); 43 - console.log(` 44 - ╔══════════════════════════════════════════╗ 45 - ║ CACHE BACKFILL STARTING ║ 46 - ╚══════════════════════════════════════════╝ 47 - `); 48 - 49 - try { 50 - // Get all sites from database 51 - let sites = await getAllSites(); 52 - stats.total = sites.length; 53 - 54 - logger.info(`Found ${sites.length} sites in database`); 55 - console.log(`📊 Found ${sites.length} sites in database`); 56 - 57 - // Limit if specified 58 - if (maxSites && maxSites > 0) { 59 - sites = sites.slice(0, maxSites); 60 - console.log(`⚙️ Limited to ${maxSites} sites for backfill`); 61 - } 62 - 63 - // Process sites with sliding window concurrency pool 64 - const executing = new Set<Promise<void>>(); 65 - let processed = 0; 66 - 67 - for (const site of sites) { 68 - // Create task for this site 69 - const processSite = async () => { 70 - try { 71 - // Check if already cached 72 - if (skipExisting && await isCached(site.did, site.rkey)) { 73 - stats.skipped++; 74 - processed++; 75 - logger.debug(`Skipping already cached site`, { did: site.did, rkey: site.rkey }); 76 - console.log(`⏭️ [${processed}/${sites.length}] Skipped (cached): ${site.display_name || site.rkey}`); 77 - return; 78 - } 79 - 80 - // Fetch site record 81 - const siteData = await fetchSiteRecord(site.did, site.rkey); 82 - if (!siteData) { 83 - stats.failed++; 84 - processed++; 85 - logger.error('Site record not found during backfill', null, { did: site.did, rkey: site.rkey }); 86 - console.log(`❌ [${processed}/${sites.length}] Failed (not found): ${site.display_name || site.rkey}`); 87 - return; 88 - } 89 - 90 - // Get PDS endpoint 91 - const pdsEndpoint = await getPdsForDid(site.did); 92 - if (!pdsEndpoint) { 93 - stats.failed++; 94 - processed++; 95 - logger.error('PDS not found during backfill', null, { did: site.did }); 96 - console.log(`❌ [${processed}/${sites.length}] Failed (no PDS): ${site.display_name || site.rkey}`); 97 - return; 98 - } 99 - 100 - // Mark site as being cached to prevent serving stale content during update 101 - markSiteAsBeingCached(site.did, site.rkey); 102 - 103 - try { 104 - // Download and cache site 105 - console.log(`[Backfill] Caching site from backfill - ${site.did}:${site.rkey}`); 106 - await downloadAndCacheSite(site.did, site.rkey, siteData.record, pdsEndpoint, siteData.cid); 107 - // Clear redirect rules cache since the site was updated 108 - clearRedirectRulesCache(site.did, site.rkey); 109 - stats.cached++; 110 - processed++; 111 - logger.info('Successfully cached site during backfill', { did: site.did, rkey: site.rkey }); 112 - console.log(`✅ [${processed}/${sites.length}] Cached (backfill): ${site.display_name || site.rkey}`); 113 - } finally { 114 - // Always unmark, even if caching fails 115 - unmarkSiteAsBeingCached(site.did, site.rkey); 116 - } 117 - } catch (err) { 118 - stats.failed++; 119 - processed++; 120 - logger.error('Failed to cache site during backfill', err, { did: site.did, rkey: site.rkey }); 121 - console.log(`❌ [${processed}/${sites.length}] Failed: ${site.display_name || site.rkey}`); 122 - } 123 - }; 124 - 125 - // Add to executing pool and remove when done 126 - const promise = processSite().finally(() => executing.delete(promise)); 127 - executing.add(promise); 128 - 129 - // When pool is full, wait for at least one to complete 130 - if (executing.size >= concurrency) { 131 - await Promise.race(executing); 132 - } 133 - } 134 - 135 - // Wait for all remaining tasks to complete 136 - await Promise.all(executing); 137 - 138 - stats.duration = Date.now() - startTime; 139 - 140 - console.log(` 141 - ╔══════════════════════════════════════════╗ 142 - ║ CACHE BACKFILL COMPLETED ║ 143 - ╚══════════════════════════════════════════╝ 144 - 145 - 📊 Total Sites: ${stats.total} 146 - ✅ Cached: ${stats.cached} 147 - ⏭️ Skipped: ${stats.skipped} 148 - ❌ Failed: ${stats.failed} 149 - ⏱️ Duration: ${(stats.duration / 1000).toFixed(2)}s 150 - `); 151 - 152 - logger.info('Cache backfill completed', stats); 153 - } catch (err) { 154 - logger.error('Cache backfill failed', err); 155 - console.error('❌ Cache backfill failed:', err); 156 - } 157 - 158 - return stats; 159 - }
+1 -54
apps/hosting-service/src/lib/cache.ts
··· 2 2 * Cache management for wisp-hosting-service 3 3 * 4 4 * With tiered storage, most caching is handled transparently. 5 - * This module tracks sites being cached and manages rewritten HTML cache. 5 + * This module provides a generic LRU cache and exposes storage stats. 6 6 */ 7 7 8 8 import { storage } from './storage'; ··· 125 125 } 126 126 } 127 127 128 - // Rewritten HTML cache: stores HTML after path rewriting for subdomain routes 129 - export const rewrittenHtmlCache = new LRUCache<Buffer>(50 * 1024 * 1024, 200); // 50MB for rewritten HTML 130 - 131 - // Helper to generate cache keys for rewritten HTML 132 - export function getCacheKey(did: string, rkey: string, filePath: string, suffix?: string): string { 133 - const base = `${did}:${rkey}:${filePath}`; 134 - return suffix ? `${base}:${suffix}` : base; 135 - } 136 - 137 - /** 138 - * Invalidate site cache via tiered storage 139 - * Also invalidates locally cached rewritten HTML 140 - */ 141 - export async function invalidateSiteCache(did: string, rkey: string): Promise<void> { 142 - // Invalidate in tiered storage 143 - const prefix = `${did}/${rkey}/`; 144 - const deleted = await storage.invalidate(prefix); 145 - 146 - // Invalidate rewritten HTML cache for this site 147 - const sitePrefix = `${did}:${rkey}:`; 148 - let htmlCount = 0; 149 - const cacheKeys = Array.from((rewrittenHtmlCache as any).cache?.keys() || []) as string[]; 150 - for (const key of cacheKeys) { 151 - if (key.startsWith(sitePrefix)) { 152 - rewrittenHtmlCache.delete(key); 153 - htmlCount++; 154 - } 155 - } 156 - 157 - console.log(`[Cache] Invalidated site ${did}:${rkey} - ${deleted} files in tiered storage, ${htmlCount} rewritten HTML`); 158 - } 159 - 160 - // Track sites currently being cached (to prevent serving stale cache during updates) 161 - const sitesBeingCached = new Set<string>(); 162 - 163 - export function markSiteAsBeingCached(did: string, rkey: string): void { 164 - const key = `${did}:${rkey}`; 165 - sitesBeingCached.add(key); 166 - } 167 - 168 - export function unmarkSiteAsBeingCached(did: string, rkey: string): void { 169 - const key = `${did}:${rkey}`; 170 - sitesBeingCached.delete(key); 171 - } 172 - 173 - export function isSiteBeingCached(did: string, rkey: string): boolean { 174 - const key = `${did}:${rkey}`; 175 - return sitesBeingCached.has(key); 176 - } 177 - 178 128 // Get overall cache statistics 179 129 export async function getCacheStats() { 180 130 const tieredStats = await storage.getStats(); 181 131 182 132 return { 183 133 tieredStorage: tieredStats, 184 - rewrittenHtml: rewrittenHtmlCache.getStats(), 185 - rewrittenHtmlHitRate: rewrittenHtmlCache.getHitRate(), 186 - sitesBeingCached: sitesBeingCached.size, 187 134 }; 188 135 }
+4 -37
apps/hosting-service/src/lib/db.ts
··· 2 2 import { createHash } from 'crypto'; 3 3 import type { DomainLookup, CustomDomainLookup, SiteCache, SiteSettingsCache } from '@wispplace/database'; 4 4 5 - // Global cache-only mode flag (set by index.ts) 6 - let cacheOnlyMode = false; 7 - 8 - export function setCacheOnlyMode(enabled: boolean) { 9 - cacheOnlyMode = enabled; 10 - if (enabled) { 11 - console.log('[DB] Cache-only mode enabled - database writes will be skipped'); 12 - } 13 - } 14 - 15 5 const sql = postgres( 16 6 process.env.DATABASE_URL || 'postgres://postgres:postgres@localhost:5432/wisp', 17 7 { ··· 126 116 } 127 117 128 118 export async function upsertSite(did: string, rkey: string, displayName?: string) { 129 - // Skip database writes in cache-only mode 130 - if (cacheOnlyMode) { 131 - console.log('[DB] Skipping upsertSite (cache-only mode)', { did, rkey }); 132 - return; 133 - } 134 - 135 - try { 136 - // Only set display_name if provided (not undefined/null/empty) 137 - const cleanDisplayName = displayName && displayName.trim() ? displayName.trim() : null; 138 - 139 - await sql` 140 - INSERT INTO sites (did, rkey, display_name, created_at, updated_at) 141 - VALUES (${did}, ${rkey}, ${cleanDisplayName}, EXTRACT(EPOCH FROM NOW()), EXTRACT(EPOCH FROM NOW())) 142 - ON CONFLICT (did, rkey) 143 - DO UPDATE SET 144 - display_name = CASE 145 - WHEN EXCLUDED.display_name IS NOT NULL THEN EXCLUDED.display_name 146 - ELSE sites.display_name 147 - END, 148 - updated_at = EXTRACT(EPOCH FROM NOW()) 149 - `; 150 - } catch (err) { 151 - console.error('Failed to upsert site', err); 152 - } 119 + console.log('[DB] Read-only mode: skipping upsertSite', { did, rkey, displayName }); 153 120 } 154 121 155 122 export interface SiteRecord { ··· 256 223 return result[0] || null; 257 224 } 258 225 259 - export async function getSiteSettingsCache(did: string): Promise<SiteSettingsCache | null> { 226 + export async function getSiteSettingsCache(did: string, rkey: string): Promise<SiteSettingsCache | null> { 260 227 const result = await sql<SiteSettingsCache[]>` 261 - SELECT did, record_cid, directory_listing, spa_mode, custom_404, index_files, clean_urls, headers, cached_at, updated_at 228 + SELECT did, rkey, record_cid, directory_listing, spa_mode, custom_404, index_files, clean_urls, headers, cached_at, updated_at 262 229 FROM site_settings_cache 263 - WHERE did = ${did} 230 + WHERE did = ${did} AND rkey = ${rkey} 264 231 LIMIT 1 265 232 `; 266 233 return result[0] || null;
+307 -436
apps/hosting-service/src/lib/file-serving.ts
··· 3 3 * Handles file retrieval, caching, redirects, and HTML rewriting 4 4 */ 5 5 6 - import { readFile } from 'fs/promises'; 7 6 import { lookup } from 'mime-types'; 7 + import { gunzipSync } from 'zlib'; 8 8 import type { Record as WispSettings } from '@wispplace/lexicons/types/place/wisp/settings'; 9 9 import { shouldCompressMimeType } from '@wispplace/atproto-utils/compression'; 10 - import { rewrittenHtmlCache, getCacheKey, isSiteBeingCached } from './cache'; 11 - import { getCachedFilePath, getCachedSettings } from './utils'; 10 + import { getCachedSettings } from './utils'; 12 11 import { loadRedirectRules, matchRedirectRule, parseCookies, parseQueryString } from './redirects'; 13 - import { rewriteHtmlPaths, isHtmlContent } from './html-rewriter'; 14 - import { generate404Page, generateDirectoryListing, siteUpdatingResponse } from './page-generators'; 15 - import { getIndexFiles, applyCustomHeaders, fileExists } from './request-utils'; 12 + import { isHtmlContent } from './html-rewriter'; 13 + import { generate404Page, generateDirectoryListing } from './page-generators'; 14 + import { getIndexFiles, applyCustomHeaders } from './request-utils'; 16 15 import { getRedirectRulesFromCache, setRedirectRulesInCache } from './site-cache'; 17 16 import { storage } from './storage'; 17 + import { getSiteCache } from './db'; 18 + import { enqueueRevalidate } from './revalidate-queue'; 19 + import { recordStorageMiss } from './revalidate-metrics'; 20 + import { normalizeFileCids } from '@wispplace/fs-utils'; 18 21 19 22 /** 20 23 * Helper to retrieve a file with metadata from tiered storage ··· 37 40 return result; 38 41 } 39 42 43 + function buildStorageKey(did: string, rkey: string, filePath: string): string { 44 + const normalized = filePath.startsWith('/') ? filePath.slice(1) : filePath; 45 + return `${did}/${rkey}/${normalized}`; 46 + } 47 + 48 + async function storageExists(did: string, rkey: string, filePath: string): Promise<boolean> { 49 + const key = buildStorageKey(did, rkey, filePath); 50 + return storage.exists(key); 51 + } 52 + 53 + function buildStorageMissResponse(): Response { 54 + return new Response('Storage temporarily unavailable', { 55 + status: 503, 56 + headers: { 57 + 'Cache-Control': 'no-store', 58 + 'Retry-After': '5', 59 + }, 60 + }); 61 + } 62 + 63 + async function listDirectoryEntries( 64 + did: string, 65 + rkey: string, 66 + requestPath: string 67 + ): Promise<Array<{ name: string; isDirectory: boolean }>> { 68 + const prefix = buildStorageKey(did, rkey, requestPath ? `${requestPath}/` : ''); 69 + const entries = new Map<string, boolean>(); 70 + 71 + for await (const key of storage.listKeys(prefix)) { 72 + const relative = key.slice(prefix.length); 73 + if (!relative) continue; 74 + if (relative.startsWith('.rewritten/')) continue; 75 + 76 + const [name, ...rest] = relative.split('/'); 77 + if (!name || name === '.metadata.json' || name.endsWith('.meta')) continue; 78 + 79 + const isDirectory = rest.length > 0; 80 + const existing = entries.get(name); 81 + if (existing === undefined || (isDirectory && !existing)) { 82 + entries.set(name, isDirectory); 83 + } 84 + } 85 + 86 + return Array.from(entries.entries()).map(([name, isDirectory]) => ({ name, isDirectory })); 87 + } 88 + 89 + async function getFileForRequest( 90 + did: string, 91 + rkey: string, 92 + filePath: string, 93 + preferRewrittenHtml: boolean 94 + ): Promise<{ result: Awaited<ReturnType<typeof storage.getWithMetadata>>; filePath: string } | null> { 95 + const mimeTypeGuess = lookup(filePath) || 'application/octet-stream'; 96 + if (preferRewrittenHtml && isHtmlContent(filePath, mimeTypeGuess)) { 97 + const rewrittenPath = `.rewritten/${filePath}`; 98 + const rewritten = await getFileWithMetadata(did, rkey, rewrittenPath); 99 + if (rewritten) { 100 + return { result: rewritten, filePath }; 101 + } 102 + } 103 + 104 + const result = await getFileWithMetadata(did, rkey, filePath); 105 + if (!result) return null; 106 + return { result, filePath }; 107 + } 108 + 109 + function buildResponseFromStorageResult( 110 + result: Awaited<ReturnType<typeof storage.getWithMetadata>>, 111 + filePath: string, 112 + settings: WispSettings | null, 113 + requestHeaders?: Record<string, string> 114 + ): Response { 115 + const content = Buffer.from(result.data); 116 + const meta = result.metadata.customMetadata as { encoding?: string; mimeType?: string } | undefined; 117 + const mimeType = meta?.mimeType || lookup(filePath) || 'application/octet-stream'; 118 + 119 + const headers: Record<string, string> = { 120 + 'Content-Type': mimeType, 121 + 'Cache-Control': mimeType.startsWith('text/html') 122 + ? 'public, max-age=300' 123 + : 'public, max-age=31536000, immutable', 124 + 'X-Cache-Tier': result.source, 125 + }; 126 + 127 + if (meta?.encoding === 'gzip') { 128 + const shouldServeCompressed = shouldCompressMimeType(mimeType); 129 + const acceptEncoding = requestHeaders?.['accept-encoding'] ?? ''; 130 + const clientAcceptsGzip = acceptEncoding.includes('gzip'); 131 + const hasGzipMagic = content.length >= 2 && content[0] === 0x1f && content[1] === 0x8b; 132 + 133 + if (!clientAcceptsGzip || !shouldServeCompressed) { 134 + if (hasGzipMagic) { 135 + const decompressed = gunzipSync(content); 136 + applyCustomHeaders(headers, filePath, settings); 137 + return new Response(decompressed, { headers }); 138 + } 139 + console.warn(`File ${filePath} marked as gzipped but lacks magic bytes, serving as-is`); 140 + applyCustomHeaders(headers, filePath, settings); 141 + return new Response(content, { headers }); 142 + } 143 + 144 + headers['Content-Encoding'] = 'gzip'; 145 + } 146 + 147 + applyCustomHeaders(headers, filePath, settings); 148 + return new Response(content, { headers }); 149 + } 150 + 40 151 /** 41 152 * Helper to serve files from cache (for custom domains and subdomains) 42 153 */ ··· 83 194 checkPath += indexFiles[0] || 'index.html'; 84 195 } 85 196 86 - const cachedFile = getCachedFilePath(did, rkey, checkPath); 87 - const fileExistsOnDisk = await fileExists(cachedFile); 197 + const fileExistsInStorage = await storageExists(did, rkey, checkPath); 88 198 89 199 // If file exists and redirect is not forced, serve the file normally 90 - if (fileExistsOnDisk) { 91 - return serveFileInternal(did, rkey, filePath, settings); 200 + if (fileExistsInStorage) { 201 + return serveFileInternal(did, rkey, filePath, settings, headers); 92 202 } 93 203 } 94 204 ··· 97 207 // Rewrite: serve different content but keep URL the same 98 208 // Remove leading slash for internal path resolution 99 209 const rewritePath = targetPath.startsWith('/') ? targetPath.slice(1) : targetPath; 100 - return serveFileInternal(did, rkey, rewritePath, settings); 210 + return serveFileInternal(did, rkey, rewritePath, settings, headers); 101 211 } else if (status === 301 || status === 302) { 102 212 // External redirect: change the URL 103 213 return new Response(null, { ··· 110 220 } else if (status === 404) { 111 221 // Custom 404 page from _redirects (wins over settings.custom404) 112 222 const custom404Path = targetPath.startsWith('/') ? targetPath.slice(1) : targetPath; 113 - const response = await serveFileInternal(did, rkey, custom404Path, settings); 223 + const response = await serveFileInternal(did, rkey, custom404Path, settings, headers); 114 224 // Override status to 404 115 225 return new Response(response.body, { 116 226 status: 404, ··· 121 231 } 122 232 123 233 // No redirect matched, serve normally with settings 124 - return serveFileInternal(did, rkey, filePath, settings); 234 + return serveFileInternal(did, rkey, filePath, settings, headers); 125 235 } 126 236 127 237 /** ··· 131 241 did: string, 132 242 rkey: string, 133 243 filePath: string, 134 - settings: WispSettings | null = null 244 + settings: WispSettings | null = null, 245 + requestHeaders?: Record<string, string> 135 246 ): Promise<Response> { 136 - // Check if site is currently being cached - if so, return updating response 137 - if (isSiteBeingCached(did, rkey)) { 138 - return siteUpdatingResponse(); 139 - } 247 + let expectedFileCids: Record<string, string> | null | undefined; 248 + let expectedMissPath: string | null = null; 249 + 250 + const getExpectedFileCids = async (): Promise<Record<string, string> | null> => { 251 + if (expectedFileCids !== undefined) return expectedFileCids; 252 + const siteCache = await getSiteCache(did, rkey); 253 + if (!siteCache) { 254 + expectedFileCids = null; 255 + return null; 256 + } 257 + expectedFileCids = normalizeFileCids(siteCache.file_cids).value; 258 + return expectedFileCids; 259 + }; 260 + 261 + const markExpectedMiss = async (path: string) => { 262 + if (expectedMissPath) return; 263 + const fileCids = await getExpectedFileCids(); 264 + if (!fileCids) return; 265 + const normalized = path.startsWith('/') ? path.slice(1) : path; 266 + if (fileCids[normalized]) { 267 + expectedMissPath = normalized; 268 + } 269 + }; 270 + 271 + const maybeReturnStorageMiss = async (): Promise<Response | null> => { 272 + if (!expectedMissPath) return null; 273 + recordStorageMiss(expectedMissPath); 274 + await enqueueRevalidate(did, rkey, `storage-miss:${expectedMissPath}`); 275 + return buildStorageMissResponse(); 276 + }; 140 277 141 278 const indexFiles = getIndexFiles(settings); 142 279 ··· 146 283 requestPath = requestPath.slice(0, -1); 147 284 } 148 285 149 - // Check if this path is a directory first 150 - const directoryPath = getCachedFilePath(did, rkey, requestPath); 151 - if (await fileExists(directoryPath)) { 152 - const { stat, readdir } = await import('fs/promises'); 153 - try { 154 - const stats = await stat(directoryPath); 155 - if (stats.isDirectory()) { 156 - // It's a directory, try each index file in order 157 - for (const indexFile of indexFiles) { 158 - const indexPath = requestPath ? `${requestPath}/${indexFile}` : indexFile; 159 - const indexFilePath = getCachedFilePath(did, rkey, indexPath); 160 - if (await fileExists(indexFilePath)) { 161 - return serveFileInternal(did, rkey, indexPath, settings); 162 - } 163 - } 164 - // No index file found - check if directory listing is enabled 165 - if (settings?.directoryListing) { 166 - const { stat } = await import('fs/promises'); 167 - const entries = await readdir(directoryPath); 168 - // Filter out .meta files and other hidden files 169 - const visibleEntries = entries.filter(entry => !entry.endsWith('.meta') && entry !== '.metadata.json'); 170 - 171 - // Check which entries are directories 172 - const entriesWithType = await Promise.all( 173 - visibleEntries.map(async (name) => { 174 - try { 175 - const entryPath = `${directoryPath}/${name}`; 176 - const stats = await stat(entryPath); 177 - return { name, isDirectory: stats.isDirectory() }; 178 - } catch { 179 - return { name, isDirectory: false }; 180 - } 181 - }) 182 - ); 183 - 184 - const html = generateDirectoryListing(requestPath, entriesWithType); 185 - return new Response(html, { 186 - headers: { 187 - 'Content-Type': 'text/html; charset=utf-8', 188 - 'Cache-Control': 'public, max-age=300', 189 - }, 190 - }); 191 - } 192 - // Fall through to 404/SPA handling 286 + // Check if this path is a directory first (best-effort via prefix scan) 287 + const directoryEntries = await listDirectoryEntries(did, rkey, requestPath); 288 + if (directoryEntries.length > 0) { 289 + // It's a directory, try each index file in order 290 + for (const indexFile of indexFiles) { 291 + const indexPath = requestPath ? `${requestPath}/${indexFile}` : indexFile; 292 + if (await storageExists(did, rkey, indexPath)) { 293 + return serveFileInternal(did, rkey, indexPath, settings, requestHeaders); 193 294 } 194 - } catch (err) { 195 - // If stat fails, continue with normal flow 295 + await markExpectedMiss(indexPath); 296 + } 297 + // No index file found - check if directory listing is enabled 298 + if (settings?.directoryListing) { 299 + const missResponse = await maybeReturnStorageMiss(); 300 + if (missResponse) return missResponse; 301 + const html = generateDirectoryListing(requestPath, directoryEntries); 302 + return new Response(html, { 303 + headers: { 304 + 'Content-Type': 'text/html; charset=utf-8', 305 + 'Cache-Control': 'public, max-age=300', 306 + }, 307 + }); 196 308 } 309 + // Fall through to 404/SPA handling 197 310 } 198 311 199 312 // Not a directory, try to serve as a file ··· 203 316 const result = await getFileWithMetadata(did, rkey, fileRequestPath); 204 317 205 318 if (result) { 206 - const content = Buffer.from(result.data); 207 - const meta = result.metadata.customMetadata as { encoding?: string; mimeType?: string } | undefined; 208 - 209 - // Build headers with caching 210 - const headers: Record<string, string> = { 211 - 'X-Cache-Tier': result.source, 212 - }; 213 - 214 - if (meta?.encoding === 'gzip' && meta.mimeType) { 215 - const shouldServeCompressed = shouldCompressMimeType(meta.mimeType); 216 - 217 - if (!shouldServeCompressed) { 218 - // Verify content is actually gzipped before attempting decompression 219 - const isGzipped = content.length >= 2 && content[0] === 0x1f && content[1] === 0x8b; 220 - if (isGzipped) { 221 - const { gunzipSync } = await import('zlib'); 222 - const decompressed = gunzipSync(content); 223 - headers['Content-Type'] = meta.mimeType; 224 - headers['Cache-Control'] = 'public, max-age=31536000, immutable'; 225 - applyCustomHeaders(headers, fileRequestPath, settings); 226 - return new Response(decompressed, { headers }); 227 - } else { 228 - // Meta says gzipped but content isn't - serve as-is 229 - console.warn(`File ${filePath} has gzip encoding in meta but content lacks gzip magic bytes`); 230 - headers['Content-Type'] = meta.mimeType; 231 - headers['Cache-Control'] = 'public, max-age=31536000, immutable'; 232 - applyCustomHeaders(headers, fileRequestPath, settings); 233 - return new Response(content, { headers }); 234 - } 235 - } 236 - 237 - headers['Content-Type'] = meta.mimeType; 238 - headers['Content-Encoding'] = 'gzip'; 239 - headers['Cache-Control'] = meta.mimeType.startsWith('text/html') 240 - ? 'public, max-age=300' 241 - : 'public, max-age=31536000, immutable'; 242 - applyCustomHeaders(headers, fileRequestPath, settings); 243 - return new Response(content, { headers }); 244 - } 245 - 246 - // Non-compressed files 247 - const mimeType = meta?.mimeType || lookup(fileRequestPath) || 'application/octet-stream'; 248 - headers['Content-Type'] = mimeType; 249 - headers['Cache-Control'] = mimeType.startsWith('text/html') 250 - ? 'public, max-age=300' 251 - : 'public, max-age=31536000, immutable'; 252 - applyCustomHeaders(headers, fileRequestPath, settings); 253 - return new Response(content, { headers }); 319 + return buildResponseFromStorageResult(result, fileRequestPath, settings, requestHeaders); 254 320 } 321 + await markExpectedMiss(fileRequestPath); 255 322 256 323 // Try index files for directory-like paths 257 324 if (!fileRequestPath.includes('.')) { ··· 277 344 applyCustomHeaders(headers, indexPath, settings); 278 345 return new Response(indexContent, { headers }); 279 346 } 347 + await markExpectedMiss(indexPath); 280 348 } 281 349 } 282 350 283 351 // Try clean URLs: /about -> /about.html 284 352 if (settings?.cleanUrls && !fileRequestPath.includes('.')) { 285 353 const htmlPath = `${fileRequestPath}.html`; 286 - const htmlFile = getCachedFilePath(did, rkey, htmlPath); 287 - if (await fileExists(htmlFile)) { 288 - return serveFileInternal(did, rkey, htmlPath, settings); 354 + if (await storageExists(did, rkey, htmlPath)) { 355 + return serveFileInternal(did, rkey, htmlPath, settings, requestHeaders); 289 356 } 357 + await markExpectedMiss(htmlPath); 290 358 291 359 // Also try /about/index.html 292 360 for (const indexFileName of indexFiles) { 293 361 const indexPath = fileRequestPath ? `${fileRequestPath}/${indexFileName}` : indexFileName; 294 - const indexFile = getCachedFilePath(did, rkey, indexPath); 295 - if (await fileExists(indexFile)) { 296 - return serveFileInternal(did, rkey, indexPath, settings); 362 + if (await storageExists(did, rkey, indexPath)) { 363 + return serveFileInternal(did, rkey, indexPath, settings, requestHeaders); 297 364 } 365 + await markExpectedMiss(indexPath); 298 366 } 299 367 } 300 368 301 369 // SPA mode: serve SPA file for all non-existing routes (wins over custom404 but loses to _redirects) 302 370 if (settings?.spaMode) { 303 371 const spaFile = settings.spaMode; 304 - const spaFilePath = getCachedFilePath(did, rkey, spaFile); 305 - if (await fileExists(spaFilePath)) { 306 - return serveFileInternal(did, rkey, spaFile, settings); 372 + if (await storageExists(did, rkey, spaFile)) { 373 + return serveFileInternal(did, rkey, spaFile, settings, requestHeaders); 307 374 } 375 + await markExpectedMiss(spaFile); 308 376 } 309 377 310 378 // Custom 404: serve custom 404 file if configured (wins conflict battle) 311 379 if (settings?.custom404) { 312 380 const custom404File = settings.custom404; 313 - const custom404Path = getCachedFilePath(did, rkey, custom404File); 314 - if (await fileExists(custom404Path)) { 315 - const response: Response = await serveFileInternal(did, rkey, custom404File, settings); 381 + if (await storageExists(did, rkey, custom404File)) { 382 + const response: Response = await serveFileInternal(did, rkey, custom404File, settings, requestHeaders); 316 383 // Override status to 404 317 384 return new Response(response.body, { 318 385 status: 404, 319 386 headers: response.headers, 320 387 }); 321 388 } 389 + await markExpectedMiss(custom404File); 322 390 } 323 391 324 392 // Autodetect 404 pages (GitHub Pages: 404.html, Neocities/Nekoweb: not_found.html) 325 393 const auto404Pages = ['404.html', 'not_found.html']; 326 394 for (const auto404Page of auto404Pages) { 327 - const auto404Path = getCachedFilePath(did, rkey, auto404Page); 328 - if (await fileExists(auto404Path)) { 329 - const response: Response = await serveFileInternal(did, rkey, auto404Page, settings); 395 + if (await storageExists(did, rkey, auto404Page)) { 396 + const response: Response = await serveFileInternal(did, rkey, auto404Page, settings, requestHeaders); 330 397 // Override status to 404 331 398 return new Response(response.body, { 332 399 status: 404, 333 400 headers: response.headers, 334 401 }); 335 402 } 403 + await markExpectedMiss(auto404Page); 336 404 } 337 405 338 406 // Directory listing fallback: if enabled, show root directory listing on 404 339 407 if (settings?.directoryListing) { 340 - const rootPath = getCachedFilePath(did, rkey, ''); 341 - if (await fileExists(rootPath)) { 342 - const { stat, readdir } = await import('fs/promises'); 343 - try { 344 - const stats = await stat(rootPath); 345 - if (stats.isDirectory()) { 346 - const entries = await readdir(rootPath); 347 - // Filter out .meta files and metadata 348 - const visibleEntries = entries.filter(entry => 349 - !entry.endsWith('.meta') && entry !== '.metadata.json' 350 - ); 351 - 352 - // Check which entries are directories 353 - const entriesWithType = await Promise.all( 354 - visibleEntries.map(async (name) => { 355 - try { 356 - const entryPath = `${rootPath}/${name}`; 357 - const entryStats = await stat(entryPath); 358 - return { name, isDirectory: entryStats.isDirectory() }; 359 - } catch { 360 - return { name, isDirectory: false }; 361 - } 362 - }) 363 - ); 364 - 365 - const html = generateDirectoryListing('', entriesWithType); 366 - return new Response(html, { 367 - status: 404, 368 - headers: { 369 - 'Content-Type': 'text/html; charset=utf-8', 370 - 'Cache-Control': 'public, max-age=300', 371 - }, 372 - }); 373 - } 374 - } catch (err) { 375 - // If directory listing fails, fall through to 404 376 - } 408 + const rootEntries = await listDirectoryEntries(did, rkey, ''); 409 + if (rootEntries.length > 0) { 410 + const missResponse = await maybeReturnStorageMiss(); 411 + if (missResponse) return missResponse; 412 + const html = generateDirectoryListing('', rootEntries); 413 + return new Response(html, { 414 + status: 404, 415 + headers: { 416 + 'Content-Type': 'text/html; charset=utf-8', 417 + 'Cache-Control': 'public, max-age=300', 418 + }, 419 + }); 377 420 } 378 421 } 379 422 423 + const missResponse = await maybeReturnStorageMiss(); 424 + if (missResponse) return missResponse; 425 + 380 426 // Default styled 404 page 381 427 const html = generate404Page(); 382 428 return new Response(html, { ··· 435 481 checkPath += indexFiles[0] || 'index.html'; 436 482 } 437 483 438 - const cachedFile = getCachedFilePath(did, rkey, checkPath); 439 - const fileExistsOnDisk = await fileExists(cachedFile); 484 + const fileExistsInStorage = await storageExists(did, rkey, checkPath); 440 485 441 486 // If file exists and redirect is not forced, serve the file normally 442 - if (fileExistsOnDisk) { 443 - return serveFileInternalWithRewrite(did, rkey, filePath, basePath, settings); 487 + if (fileExistsInStorage) { 488 + return serveFileInternalWithRewrite(did, rkey, filePath, basePath, settings, headers); 444 489 } 445 490 } 446 491 ··· 448 493 if (status === 200) { 449 494 // Rewrite: serve different content but keep URL the same 450 495 const rewritePath = targetPath.startsWith('/') ? targetPath.slice(1) : targetPath; 451 - return serveFileInternalWithRewrite(did, rkey, rewritePath, basePath, settings); 496 + return serveFileInternalWithRewrite(did, rkey, rewritePath, basePath, settings, headers); 452 497 } else if (status === 301 || status === 302) { 453 498 // External redirect: change the URL 454 499 // For sites.wisp.place, we need to adjust the target path to include the base path ··· 467 512 } else if (status === 404) { 468 513 // Custom 404 page from _redirects (wins over settings.custom404) 469 514 const custom404Path = targetPath.startsWith('/') ? targetPath.slice(1) : targetPath; 470 - const response = await serveFileInternalWithRewrite(did, rkey, custom404Path, basePath, settings); 515 + const response = await serveFileInternalWithRewrite(did, rkey, custom404Path, basePath, settings, headers); 471 516 // Override status to 404 472 517 return new Response(response.body, { 473 518 status: 404, ··· 478 523 } 479 524 480 525 // No redirect matched, serve normally with settings 481 - return serveFileInternalWithRewrite(did, rkey, filePath, basePath, settings); 526 + return serveFileInternalWithRewrite(did, rkey, filePath, basePath, settings, headers); 482 527 } 483 528 484 529 /** ··· 489 534 rkey: string, 490 535 filePath: string, 491 536 basePath: string, 492 - settings: WispSettings | null = null 537 + settings: WispSettings | null = null, 538 + requestHeaders?: Record<string, string> 493 539 ): Promise<Response> { 494 - // Check if site is currently being cached - if so, return updating response 495 - if (isSiteBeingCached(did, rkey)) { 496 - return siteUpdatingResponse(); 497 - } 540 + let expectedFileCids: Record<string, string> | null | undefined; 541 + let expectedMissPath: string | null = null; 542 + 543 + const getExpectedFileCids = async (): Promise<Record<string, string> | null> => { 544 + if (expectedFileCids !== undefined) return expectedFileCids; 545 + const siteCache = await getSiteCache(did, rkey); 546 + if (!siteCache) { 547 + expectedFileCids = null; 548 + return null; 549 + } 550 + expectedFileCids = normalizeFileCids(siteCache.file_cids).value; 551 + return expectedFileCids; 552 + }; 553 + 554 + const markExpectedMiss = async (path: string) => { 555 + if (expectedMissPath) return; 556 + const fileCids = await getExpectedFileCids(); 557 + if (!fileCids) return; 558 + const normalized = path.startsWith('/') ? path.slice(1) : path; 559 + if (fileCids[normalized]) { 560 + expectedMissPath = normalized; 561 + } 562 + }; 563 + 564 + const maybeReturnStorageMiss = async (): Promise<Response | null> => { 565 + if (!expectedMissPath) return null; 566 + recordStorageMiss(expectedMissPath); 567 + await enqueueRevalidate(did, rkey, `storage-miss:${expectedMissPath}`); 568 + return buildStorageMissResponse(); 569 + }; 498 570 499 571 const indexFiles = getIndexFiles(settings); 500 572 ··· 504 576 requestPath = requestPath.slice(0, -1); 505 577 } 506 578 507 - // Check if this path is a directory first 508 - const directoryPath = getCachedFilePath(did, rkey, requestPath); 509 - if (await fileExists(directoryPath)) { 510 - const { stat, readdir } = await import('fs/promises'); 511 - try { 512 - const stats = await stat(directoryPath); 513 - if (stats.isDirectory()) { 514 - // It's a directory, try each index file in order 515 - for (const indexFile of indexFiles) { 516 - const indexPath = requestPath ? `${requestPath}/${indexFile}` : indexFile; 517 - const indexFilePath = getCachedFilePath(did, rkey, indexPath); 518 - if (await fileExists(indexFilePath)) { 519 - return serveFileInternalWithRewrite(did, rkey, indexPath, basePath, settings); 520 - } 521 - } 522 - // No index file found - check if directory listing is enabled 523 - if (settings?.directoryListing) { 524 - const { stat } = await import('fs/promises'); 525 - const entries = await readdir(directoryPath); 526 - // Filter out .meta files and other hidden files 527 - const visibleEntries = entries.filter(entry => !entry.endsWith('.meta') && entry !== '.metadata.json'); 528 - 529 - // Check which entries are directories 530 - const entriesWithType = await Promise.all( 531 - visibleEntries.map(async (name) => { 532 - try { 533 - const entryPath = `${directoryPath}/${name}`; 534 - const stats = await stat(entryPath); 535 - return { name, isDirectory: stats.isDirectory() }; 536 - } catch { 537 - return { name, isDirectory: false }; 538 - } 539 - }) 540 - ); 541 - 542 - const html = generateDirectoryListing(requestPath, entriesWithType); 543 - return new Response(html, { 544 - headers: { 545 - 'Content-Type': 'text/html; charset=utf-8', 546 - 'Cache-Control': 'public, max-age=300', 547 - }, 548 - }); 549 - } 550 - // Fall through to 404/SPA handling 579 + // Check if this path is a directory first (best-effort via prefix scan) 580 + const directoryEntries = await listDirectoryEntries(did, rkey, requestPath); 581 + if (directoryEntries.length > 0) { 582 + // It's a directory, try each index file in order 583 + for (const indexFile of indexFiles) { 584 + const indexPath = requestPath ? `${requestPath}/${indexFile}` : indexFile; 585 + if (await storageExists(did, rkey, indexPath)) { 586 + return serveFileInternalWithRewrite(did, rkey, indexPath, basePath, settings, requestHeaders); 551 587 } 552 - } catch (err) { 553 - // If stat fails, continue with normal flow 588 + await markExpectedMiss(indexPath); 589 + } 590 + // No index file found - check if directory listing is enabled 591 + if (settings?.directoryListing) { 592 + const missResponse = await maybeReturnStorageMiss(); 593 + if (missResponse) return missResponse; 594 + const html = generateDirectoryListing(requestPath, directoryEntries); 595 + return new Response(html, { 596 + headers: { 597 + 'Content-Type': 'text/html; charset=utf-8', 598 + 'Cache-Control': 'public, max-age=300', 599 + }, 600 + }); 554 601 } 602 + // Fall through to 404/SPA handling 555 603 } 556 604 557 605 // Not a directory, try to serve as a file 558 606 const fileRequestPath: string = requestPath || indexFiles[0] || 'index.html'; 559 607 560 - // Check for rewritten HTML in cache first (if it's HTML) 561 - const mimeTypeGuess = lookup(fileRequestPath) || 'application/octet-stream'; 562 - if (isHtmlContent(fileRequestPath, mimeTypeGuess)) { 563 - const rewrittenKey = getCacheKey(did, rkey, fileRequestPath, `rewritten:${basePath}`); 564 - const rewrittenContent = rewrittenHtmlCache.get(rewrittenKey); 565 - if (rewrittenContent) { 566 - console.log(`[HTML Rewrite] Serving from rewritten cache: ${rewrittenKey}`); 567 - const headers: Record<string, string> = { 568 - 'Content-Type': 'text/html; charset=utf-8', 569 - 'Content-Encoding': 'gzip', 570 - 'Cache-Control': 'public, max-age=300', 571 - 'X-Cache-Tier': 'local', // Rewritten HTML is stored locally 572 - }; 573 - applyCustomHeaders(headers, fileRequestPath, settings); 574 - return new Response(rewrittenContent, { headers }); 575 - } 608 + const fileResult = await getFileForRequest(did, rkey, fileRequestPath, true); 609 + if (fileResult) { 610 + return buildResponseFromStorageResult(fileResult.result, fileRequestPath, settings, requestHeaders); 576 611 } 577 - 578 - // Retrieve from tiered storage 579 - const result = await getFileWithMetadata(did, rkey, fileRequestPath); 580 - 581 - if (result) { 582 - const content = Buffer.from(result.data); 583 - const meta = result.metadata.customMetadata as { encoding?: string; mimeType?: string } | undefined; 584 - const mimeType = meta?.mimeType || lookup(fileRequestPath) || 'application/octet-stream'; 585 - const isGzipped = meta?.encoding === 'gzip'; 586 - 587 - console.log(`[File Serve] Serving ${fileRequestPath}, mimeType: ${mimeType}, isHTML: ${isHtmlContent(fileRequestPath, mimeType)}, basePath: ${basePath}`); 588 - 589 - // Check if this is HTML content that needs rewriting 590 - if (isHtmlContent(fileRequestPath, mimeType)) { 591 - console.log(`[HTML Rewrite] Processing ${fileRequestPath}, basePath: ${basePath}, mimeType: ${mimeType}, isGzipped: ${isGzipped}`); 592 - let htmlContent: string; 593 - if (isGzipped) { 594 - // Verify content is actually gzipped 595 - const hasGzipMagic = content.length >= 2 && content[0] === 0x1f && content[1] === 0x8b; 596 - if (hasGzipMagic) { 597 - const { gunzipSync } = await import('zlib'); 598 - htmlContent = gunzipSync(content).toString('utf-8'); 599 - } else { 600 - console.warn(`File ${fileRequestPath} marked as gzipped but lacks magic bytes, serving as-is`); 601 - htmlContent = content.toString('utf-8'); 602 - } 603 - } else { 604 - htmlContent = content.toString('utf-8'); 605 - } 606 - // Check for <base> tag which can override paths 607 - const baseTagMatch = htmlContent.match(/<base\s+[^>]*href=["'][^"']+["'][^>]*>/i); 608 - if (baseTagMatch) { 609 - console.warn(`[HTML Rewrite] WARNING: <base> tag found: ${baseTagMatch[0]} - this may override path rewrites`); 610 - } 611 - 612 - // Find src/href attributes (quoted and unquoted) to debug 613 - const allMatches = htmlContent.match(/(?:src|href)\s*=\s*["']?\/[^"'\s>]+/g); 614 - console.log(`[HTML Rewrite] Found ${allMatches ? allMatches.length : 0} local path attrs`); 615 - if (allMatches && allMatches.length > 0) { 616 - console.log(`[HTML Rewrite] Sample paths: ${allMatches.slice(0, 5).join(', ')}`); 617 - } 618 - 619 - const rewritten = rewriteHtmlPaths(htmlContent, basePath, fileRequestPath); 620 - 621 - const rewrittenMatches = rewritten.match(/(?:src|href)\s*=\s*["']?\/[^"'\s>]+/g); 622 - console.log(`[HTML Rewrite] After rewrite, found ${rewrittenMatches ? rewrittenMatches.length : 0} local paths`); 623 - if (rewrittenMatches && rewrittenMatches.length > 0) { 624 - console.log(`[HTML Rewrite] Sample rewritten: ${rewrittenMatches.slice(0, 5).join(', ')}`); 625 - } 626 - 627 - // Recompress and cache the rewritten HTML 628 - const { gzipSync } = await import('zlib'); 629 - const recompressed = gzipSync(Buffer.from(rewritten, 'utf-8')); 630 - 631 - const rewrittenKey = getCacheKey(did, rkey, fileRequestPath, `rewritten:${basePath}`); 632 - rewrittenHtmlCache.set(rewrittenKey, recompressed, recompressed.length); 633 - 634 - const htmlHeaders: Record<string, string> = { 635 - 'Content-Type': 'text/html; charset=utf-8', 636 - 'Content-Encoding': 'gzip', 637 - 'Cache-Control': 'public, max-age=300', 638 - 'X-Cache-Tier': result.source, 639 - }; 640 - applyCustomHeaders(htmlHeaders, fileRequestPath, settings); 641 - return new Response(recompressed, { headers: htmlHeaders }); 642 - } 643 - 644 - // Non-HTML files: serve as-is 645 - const headers: Record<string, string> = { 646 - 'Content-Type': mimeType, 647 - 'Cache-Control': 'public, max-age=31536000, immutable', 648 - 'X-Cache-Tier': result.source, 649 - }; 650 - 651 - if (isGzipped) { 652 - const shouldServeCompressed = shouldCompressMimeType(mimeType); 653 - if (!shouldServeCompressed) { 654 - // Verify content is actually gzipped 655 - const hasGzipMagic = content.length >= 2 && content[0] === 0x1f && content[1] === 0x8b; 656 - if (hasGzipMagic) { 657 - const { gunzipSync } = await import('zlib'); 658 - const decompressed = gunzipSync(content); 659 - applyCustomHeaders(headers, fileRequestPath, settings); 660 - return new Response(decompressed, { headers }); 661 - } else { 662 - console.warn(`File ${fileRequestPath} marked as gzipped but lacks magic bytes, serving as-is`); 663 - applyCustomHeaders(headers, fileRequestPath, settings); 664 - return new Response(content, { headers }); 665 - } 666 - } 667 - headers['Content-Encoding'] = 'gzip'; 668 - } 669 - 670 - applyCustomHeaders(headers, fileRequestPath, settings); 671 - return new Response(content, { headers }); 672 - } 612 + await markExpectedMiss(fileRequestPath); 673 613 674 614 // Try index files for directory-like paths 675 615 if (!fileRequestPath.includes('.')) { 676 616 for (const indexFileName of indexFiles) { 677 617 const indexPath = fileRequestPath ? `${fileRequestPath}/${indexFileName}` : indexFileName; 678 - 679 - // Check for rewritten index file in cache 680 - const rewrittenKey = getCacheKey(did, rkey, indexPath, `rewritten:${basePath}`); 681 - const rewrittenContent = rewrittenHtmlCache.get(rewrittenKey); 682 - if (rewrittenContent) { 683 - const headers: Record<string, string> = { 684 - 'Content-Type': 'text/html; charset=utf-8', 685 - 'Content-Encoding': 'gzip', 686 - 'Cache-Control': 'public, max-age=300', 687 - 'X-Cache-Tier': 'local', // Rewritten HTML is stored locally 688 - }; 689 - applyCustomHeaders(headers, indexPath, settings); 690 - return new Response(rewrittenContent, { headers }); 691 - } 692 - 693 - const indexResult = await getFileWithMetadata(did, rkey, indexPath); 694 - 618 + const indexResult = await getFileForRequest(did, rkey, indexPath, true); 695 619 if (indexResult) { 696 - const indexContent = Buffer.from(indexResult.data); 697 - const indexMeta = indexResult.metadata.customMetadata as { encoding?: string; mimeType?: string } | undefined; 698 - const isGzipped = indexMeta?.encoding === 'gzip'; 699 - 700 - let htmlContent: string; 701 - if (isGzipped) { 702 - // Verify content is actually gzipped 703 - const hasGzipMagic = indexContent.length >= 2 && indexContent[0] === 0x1f && indexContent[1] === 0x8b; 704 - if (hasGzipMagic) { 705 - const { gunzipSync } = await import('zlib'); 706 - htmlContent = gunzipSync(indexContent).toString('utf-8'); 707 - } else { 708 - console.warn(`Index file marked as gzipped but lacks magic bytes, serving as-is`); 709 - htmlContent = indexContent.toString('utf-8'); 710 - } 711 - } else { 712 - htmlContent = indexContent.toString('utf-8'); 713 - } 714 - const rewritten = rewriteHtmlPaths(htmlContent, basePath, indexPath); 715 - 716 - const { gzipSync } = await import('zlib'); 717 - const recompressed = gzipSync(Buffer.from(rewritten, 'utf-8')); 718 - 719 - rewrittenHtmlCache.set(rewrittenKey, recompressed, recompressed.length); 720 - 721 - const headers: Record<string, string> = { 722 - 'Content-Type': 'text/html; charset=utf-8', 723 - 'Content-Encoding': 'gzip', 724 - 'Cache-Control': 'public, max-age=300', 725 - 'X-Cache-Tier': indexResult.source, 726 - }; 727 - applyCustomHeaders(headers, indexPath, settings); 728 - return new Response(recompressed, { headers }); 620 + return buildResponseFromStorageResult(indexResult.result, indexPath, settings, requestHeaders); 729 621 } 622 + await markExpectedMiss(indexPath); 730 623 } 731 624 } 732 625 733 626 // Try clean URLs: /about -> /about.html 734 627 if (settings?.cleanUrls && !fileRequestPath.includes('.')) { 735 628 const htmlPath = `${fileRequestPath}.html`; 736 - const htmlFile = getCachedFilePath(did, rkey, htmlPath); 737 - if (await fileExists(htmlFile)) { 738 - return serveFileInternalWithRewrite(did, rkey, htmlPath, basePath, settings); 629 + if (await storageExists(did, rkey, htmlPath)) { 630 + return serveFileInternalWithRewrite(did, rkey, htmlPath, basePath, settings, requestHeaders); 739 631 } 632 + await markExpectedMiss(htmlPath); 740 633 741 634 // Also try /about/index.html 742 635 for (const indexFileName of indexFiles) { 743 636 const indexPath = fileRequestPath ? `${fileRequestPath}/${indexFileName}` : indexFileName; 744 - const indexFile = getCachedFilePath(did, rkey, indexPath); 745 - if (await fileExists(indexFile)) { 746 - return serveFileInternalWithRewrite(did, rkey, indexPath, basePath, settings); 637 + if (await storageExists(did, rkey, indexPath)) { 638 + return serveFileInternalWithRewrite(did, rkey, indexPath, basePath, settings, requestHeaders); 747 639 } 640 + await markExpectedMiss(indexPath); 748 641 } 749 642 } 750 643 751 644 // SPA mode: serve SPA file for all non-existing routes 752 645 if (settings?.spaMode) { 753 646 const spaFile = settings.spaMode; 754 - const spaFilePath = getCachedFilePath(did, rkey, spaFile); 755 - if (await fileExists(spaFilePath)) { 756 - return serveFileInternalWithRewrite(did, rkey, spaFile, basePath, settings); 647 + if (await storageExists(did, rkey, spaFile)) { 648 + return serveFileInternalWithRewrite(did, rkey, spaFile, basePath, settings, requestHeaders); 757 649 } 650 + await markExpectedMiss(spaFile); 758 651 } 759 652 760 653 // Custom 404: serve custom 404 file if configured (wins conflict battle) 761 654 if (settings?.custom404) { 762 655 const custom404File = settings.custom404; 763 - const custom404Path = getCachedFilePath(did, rkey, custom404File); 764 - if (await fileExists(custom404Path)) { 765 - const response: Response = await serveFileInternalWithRewrite(did, rkey, custom404File, basePath, settings); 656 + if (await storageExists(did, rkey, custom404File)) { 657 + const response: Response = await serveFileInternalWithRewrite(did, rkey, custom404File, basePath, settings, requestHeaders); 766 658 // Override status to 404 767 659 return new Response(response.body, { 768 660 status: 404, 769 661 headers: response.headers, 770 662 }); 771 663 } 664 + await markExpectedMiss(custom404File); 772 665 } 773 666 774 667 // Autodetect 404 pages (GitHub Pages: 404.html, Neocities/Nekoweb: not_found.html) 775 668 const auto404Pages = ['404.html', 'not_found.html']; 776 669 for (const auto404Page of auto404Pages) { 777 - const auto404Path = getCachedFilePath(did, rkey, auto404Page); 778 - if (await fileExists(auto404Path)) { 779 - const response: Response = await serveFileInternalWithRewrite(did, rkey, auto404Page, basePath, settings); 670 + if (await storageExists(did, rkey, auto404Page)) { 671 + const response: Response = await serveFileInternalWithRewrite(did, rkey, auto404Page, basePath, settings, requestHeaders); 780 672 // Override status to 404 781 673 return new Response(response.body, { 782 674 status: 404, 783 675 headers: response.headers, 784 676 }); 785 677 } 678 + await markExpectedMiss(auto404Page); 786 679 } 787 680 788 681 // Directory listing fallback: if enabled, show root directory listing on 404 789 682 if (settings?.directoryListing) { 790 - const rootPath = getCachedFilePath(did, rkey, ''); 791 - if (await fileExists(rootPath)) { 792 - const { stat, readdir } = await import('fs/promises'); 793 - try { 794 - const stats = await stat(rootPath); 795 - if (stats.isDirectory()) { 796 - const entries = await readdir(rootPath); 797 - // Filter out .meta files and metadata 798 - const visibleEntries = entries.filter(entry => 799 - !entry.endsWith('.meta') && entry !== '.metadata.json' 800 - ); 801 - 802 - // Check which entries are directories 803 - const entriesWithType = await Promise.all( 804 - visibleEntries.map(async (name) => { 805 - try { 806 - const entryPath = `${rootPath}/${name}`; 807 - const entryStats = await stat(entryPath); 808 - return { name, isDirectory: entryStats.isDirectory() }; 809 - } catch { 810 - return { name, isDirectory: false }; 811 - } 812 - }) 813 - ); 814 - 815 - const html = generateDirectoryListing('', entriesWithType); 816 - return new Response(html, { 817 - status: 404, 818 - headers: { 819 - 'Content-Type': 'text/html; charset=utf-8', 820 - 'Cache-Control': 'public, max-age=300', 821 - }, 822 - }); 823 - } 824 - } catch (err) { 825 - // If directory listing fails, fall through to 404 826 - } 683 + const rootEntries = await listDirectoryEntries(did, rkey, ''); 684 + if (rootEntries.length > 0) { 685 + const missResponse = await maybeReturnStorageMiss(); 686 + if (missResponse) return missResponse; 687 + const html = generateDirectoryListing('', rootEntries); 688 + return new Response(html, { 689 + status: 404, 690 + headers: { 691 + 'Content-Type': 'text/html; charset=utf-8', 692 + 'Cache-Control': 'public, max-age=300', 693 + }, 694 + }); 827 695 } 828 696 } 697 + 698 + const missResponse = await maybeReturnStorageMiss(); 699 + if (missResponse) return missResponse; 829 700 830 701 // Default styled 404 page 831 702 const html = generate404Page();
-512
apps/hosting-service/src/lib/firehose.ts
··· 1 - import { existsSync } from 'fs' 2 - import { 3 - getPdsForDid, 4 - downloadAndCacheSite, 5 - fetchSiteRecord 6 - } from './utils' 7 - import { upsertSite, tryAcquireLock, releaseLock } from './db' 8 - import { safeFetch } from '@wispplace/safe-fetch' 9 - // import { isRecord, validateRecord } from '@wispplace/lexicons/types/place/wisp/fs' 10 - import { isRecord } from '@wispplace/lexicons/types/place/wisp/fs' 11 - import { Firehose } from '@atproto/sync' 12 - import { IdResolver } from '@atproto/identity' 13 - import { invalidateSiteCache, markSiteAsBeingCached, unmarkSiteAsBeingCached } from './cache' 14 - import { clearRedirectRulesCache } from './site-cache' 15 - 16 - const CACHE_DIR = process.env.CACHE_DIR || './cache/sites' 17 - 18 - export class FirehoseWorker { 19 - private firehose: Firehose | null = null 20 - private idResolver: IdResolver 21 - private isShuttingDown = false 22 - private lastEventTime = Date.now() 23 - private eventCount = 0 24 - private cacheCleanupInterval: NodeJS.Timeout | null = null 25 - private healthCheckInterval: NodeJS.Timeout | null = null 26 - private processingQueue: Set<Promise<void>> = new Set() 27 - private readonly maxConcurrency = parseInt(process.env.FIREHOSE_MAX_CONCURRENCY || '5', 10) 28 - 29 - constructor( 30 - private logger?: (msg: string, data?: Record<string, unknown>) => void 31 - ) { 32 - this.idResolver = new IdResolver() 33 - this.startCacheCleanup() 34 - } 35 - 36 - private log(msg: string, data?: Record<string, unknown>) { 37 - const log = this.logger || console.log 38 - log(`[FirehoseWorker] ${msg}`, data || {}) 39 - } 40 - 41 - /** 42 - * Queue a task with concurrency limiting 43 - * Waits if max concurrent tasks are already running 44 - */ 45 - private async queueTask(task: () => Promise<void>): Promise<void> { 46 - // Wait if we're at max concurrency 47 - if (this.processingQueue.size >= this.maxConcurrency) { 48 - this.log(`Queue at max capacity (${this.maxConcurrency}), waiting for slot...`, { 49 - queueSize: this.processingQueue.size 50 - }) 51 - await Promise.race(this.processingQueue) 52 - } 53 - 54 - // Execute task and track in queue 55 - const promise = task() 56 - .catch(err => { 57 - // Errors are already logged in the handlers 58 - }) 59 - .finally(() => { 60 - this.processingQueue.delete(promise) 61 - }) 62 - 63 - this.processingQueue.add(promise) 64 - 65 - // Don't await here - we want handleEvent to return quickly 66 - // The task will process in the background with concurrency limiting 67 - } 68 - 69 - private startCacheCleanup() { 70 - // Clear IdResolver cache every hour to prevent unbounded memory growth 71 - // The IdResolver has an internal cache that never expires and can cause heap exhaustion 72 - this.cacheCleanupInterval = setInterval(() => { 73 - if (this.isShuttingDown) return 74 - 75 - this.log('Clearing IdResolver cache to prevent memory leak') 76 - 77 - // Recreate the IdResolver to clear its internal cache 78 - this.idResolver = new IdResolver() 79 - 80 - this.log('IdResolver cache cleared') 81 - }, 60 * 60 * 1000) // Every hour 82 - 83 - // Health check: log if no events received for 30 seconds 84 - this.healthCheckInterval = setInterval(() => { 85 - if (this.isShuttingDown) return 86 - 87 - const timeSinceLastEvent = Date.now() - this.lastEventTime 88 - if (timeSinceLastEvent > 30000 && this.eventCount === 0) { 89 - this.log('Warning: No firehose events received in the last 30 seconds', { 90 - timeSinceLastEvent, 91 - eventsReceived: this.eventCount 92 - }) 93 - } else if (timeSinceLastEvent > 60000) { 94 - this.log('Firehose status check', { 95 - timeSinceLastEvent, 96 - eventsReceived: this.eventCount 97 - }) 98 - } 99 - }, 30000) // Every 30 seconds 100 - } 101 - 102 - start() { 103 - this.log('Starting firehose worker') 104 - this.connect() 105 - } 106 - 107 - async stop() { 108 - this.log('Stopping firehose worker') 109 - this.isShuttingDown = true 110 - 111 - if (this.cacheCleanupInterval) { 112 - clearInterval(this.cacheCleanupInterval) 113 - this.cacheCleanupInterval = null 114 - } 115 - 116 - if (this.healthCheckInterval) { 117 - clearInterval(this.healthCheckInterval) 118 - this.healthCheckInterval = null 119 - } 120 - 121 - if (this.firehose) { 122 - this.firehose.destroy() 123 - this.firehose = null 124 - } 125 - 126 - // Wait for all queued tasks to complete 127 - if (this.processingQueue.size > 0) { 128 - this.log(`Waiting for ${this.processingQueue.size} queued tasks to complete...`) 129 - await Promise.all(this.processingQueue) 130 - this.log('All queued tasks completed') 131 - } 132 - } 133 - 134 - private connect() { 135 - if (this.isShuttingDown) return 136 - 137 - this.log('Connecting to AT Protocol firehose') 138 - 139 - this.firehose = new Firehose({ 140 - idResolver: this.idResolver, 141 - service: 'wss://bsky.network', 142 - filterCollections: ['place.wisp.fs', 'place.wisp.settings'], 143 - handleEvent: async (evt: any) => { 144 - this.lastEventTime = Date.now() 145 - this.eventCount++ 146 - 147 - if (this.eventCount === 1) { 148 - this.log('First firehose event received - connection established', { 149 - eventType: evt.event, 150 - collection: evt.collection 151 - }) 152 - } 153 - 154 - // Watch for write events 155 - if (evt.event === 'create' || evt.event === 'update') { 156 - const record = evt.record 157 - 158 - // If the write is a valid place.wisp.fs record 159 - if ( 160 - evt.collection === 'place.wisp.fs' && 161 - isRecord(record) 162 - // && validateRecord(record).success 163 - ) { 164 - this.log('Received place.wisp.fs event', { 165 - did: evt.did, 166 - event: evt.event, 167 - rkey: evt.rkey 168 - }) 169 - 170 - await this.queueTask(async () => { 171 - try { 172 - await this.handleCreateOrUpdate( 173 - evt.did, 174 - evt.rkey, 175 - record, 176 - evt.cid?.toString() 177 - ) 178 - } catch (err) { 179 - console.error('Full error details:', err); 180 - this.log('Error handling event', { 181 - did: evt.did, 182 - event: evt.event, 183 - rkey: evt.rkey, 184 - error: 185 - err instanceof Error 186 - ? err.message 187 - : String(err) 188 - }) 189 - } 190 - }) 191 - } 192 - // Handle settings changes 193 - else if (evt.collection === 'place.wisp.settings') { 194 - this.log('Received place.wisp.settings event', { 195 - did: evt.did, 196 - event: evt.event, 197 - rkey: evt.rkey 198 - }) 199 - 200 - await this.queueTask(async () => { 201 - try { 202 - await this.handleSettingsChange(evt.did, evt.rkey) 203 - } catch (err) { 204 - this.log('Error handling settings change', { 205 - did: evt.did, 206 - event: evt.event, 207 - rkey: evt.rkey, 208 - error: 209 - err instanceof Error 210 - ? err.message 211 - : String(err) 212 - }) 213 - } 214 - }) 215 - } 216 - } else if ( 217 - evt.event === 'delete' && 218 - evt.collection === 'place.wisp.fs' 219 - ) { 220 - this.log('Received delete event', { 221 - did: evt.did, 222 - rkey: evt.rkey 223 - }) 224 - 225 - await this.queueTask(async () => { 226 - try { 227 - await this.handleDelete(evt.did, evt.rkey) 228 - } catch (err) { 229 - this.log('Error handling delete', { 230 - did: evt.did, 231 - rkey: evt.rkey, 232 - error: 233 - err instanceof Error ? err.message : String(err) 234 - }) 235 - } 236 - }) 237 - } else if ( 238 - evt.event === 'delete' && 239 - evt.collection === 'place.wisp.settings' 240 - ) { 241 - this.log('Received settings delete event', { 242 - did: evt.did, 243 - rkey: evt.rkey 244 - }) 245 - 246 - await this.queueTask(async () => { 247 - try { 248 - await this.handleSettingsChange(evt.did, evt.rkey) 249 - } catch (err) { 250 - this.log('Error handling settings delete', { 251 - did: evt.did, 252 - rkey: evt.rkey, 253 - error: 254 - err instanceof Error ? err.message : String(err) 255 - }) 256 - } 257 - }) 258 - } 259 - }, 260 - onError: (err: any) => { 261 - this.log('Firehose error', { 262 - error: err instanceof Error ? err.message : String(err), 263 - stack: err instanceof Error ? err.stack : undefined, 264 - fullError: err 265 - }) 266 - console.error('Full firehose error:', err) 267 - } 268 - }) 269 - 270 - this.firehose.start().catch((err: unknown) => { 271 - this.log('Fatal firehose error', { 272 - error: err instanceof Error ? err.message : String(err) 273 - }) 274 - console.error('Fatal firehose error:', err) 275 - }) 276 - this.log('Firehose starting') 277 - } 278 - 279 - private async handleCreateOrUpdate( 280 - did: string, 281 - site: string, 282 - record: any, 283 - eventCid?: string 284 - ) { 285 - console.log(`[Firehose] Processing create/update from firehose - ${did}:${site}`) 286 - this.log('Processing create/update', { did, site }) 287 - 288 - // Record is already validated in handleEvent 289 - const fsRecord = record 290 - 291 - const pdsEndpoint = await getPdsForDid(did) 292 - if (!pdsEndpoint) { 293 - this.log('Could not resolve PDS for DID', { did }) 294 - return 295 - } 296 - 297 - this.log('Resolved PDS', { did, pdsEndpoint }) 298 - 299 - // Verify record exists on PDS and fetch its CID 300 - this.log('Verifying record on PDS', { did, site }) 301 - let verifiedCid: string 302 - try { 303 - const result = await fetchSiteRecord(did, site) 304 - 305 - if (!result) { 306 - this.log('Record not found on PDS, skipping cache', { 307 - did, 308 - site 309 - }) 310 - return 311 - } 312 - 313 - verifiedCid = result.cid 314 - 315 - // Verify event CID matches PDS CID (prevent cache poisoning) 316 - if (eventCid && eventCid !== verifiedCid) { 317 - this.log('CID mismatch detected - potential spoofed event', { 318 - did, 319 - site, 320 - eventCid, 321 - verifiedCid 322 - }) 323 - return 324 - } 325 - 326 - this.log('Record verified on PDS', { did, site, cid: verifiedCid }) 327 - } catch (err) { 328 - this.log('Failed to verify record on PDS', { 329 - did, 330 - site, 331 - error: err instanceof Error ? err.message : String(err) 332 - }) 333 - return 334 - } 335 - 336 - // Invalidate in-memory caches before updating 337 - await invalidateSiteCache(did, site) 338 - 339 - // Mark site as being cached to prevent serving stale content during update 340 - markSiteAsBeingCached(did, site) 341 - 342 - try { 343 - // Cache the record with verified CID (uses atomic swap internally) 344 - // All instances cache locally for edge serving 345 - await downloadAndCacheSite( 346 - did, 347 - site, 348 - fsRecord, 349 - pdsEndpoint, 350 - verifiedCid 351 - ) 352 - 353 - // Clear redirect rules cache since the site was updated 354 - clearRedirectRulesCache(did, site) 355 - 356 - // Acquire distributed lock only for database write to prevent duplicate writes 357 - // Note: upsertSite will check cache-only mode internally and skip if needed 358 - const lockKey = `db:upsert:${did}:${site}` 359 - const lockAcquired = await tryAcquireLock(lockKey) 360 - 361 - if (!lockAcquired) { 362 - this.log('Another instance is writing to DB, skipping upsert', { 363 - did, 364 - site 365 - }) 366 - this.log('Successfully processed create/update (cached locally)', { 367 - did, 368 - site 369 - }) 370 - return 371 - } 372 - 373 - try { 374 - // Upsert site to database (only one instance does this) 375 - // In cache-only mode, this will be a no-op 376 - await upsertSite(did, site, fsRecord.site) 377 - this.log( 378 - 'Successfully processed create/update (cached + DB updated)', 379 - { did, site } 380 - ) 381 - } finally { 382 - // Always release lock, even if DB write fails 383 - await releaseLock(lockKey) 384 - } 385 - } finally { 386 - // Always unmark, even if caching fails 387 - unmarkSiteAsBeingCached(did, site) 388 - } 389 - } 390 - 391 - private async handleDelete(did: string, site: string) { 392 - this.log('Processing delete', { did, site }) 393 - 394 - // All instances should delete their local cache (no lock needed) 395 - const pdsEndpoint = await getPdsForDid(did) 396 - if (!pdsEndpoint) { 397 - this.log('Could not resolve PDS for DID', { did }) 398 - return 399 - } 400 - 401 - // Verify record is actually deleted from PDS 402 - try { 403 - const recordUrl = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(site)}` 404 - const recordRes = await safeFetch(recordUrl) 405 - 406 - if (recordRes.ok) { 407 - this.log('Record still exists on PDS, not deleting cache', { 408 - did, 409 - site 410 - }) 411 - return 412 - } 413 - 414 - this.log('Verified record is deleted from PDS', { 415 - did, 416 - site, 417 - status: recordRes.status 418 - }) 419 - } catch (err) { 420 - this.log('Error verifying deletion on PDS', { 421 - did, 422 - site, 423 - error: err instanceof Error ? err.message : String(err) 424 - }) 425 - } 426 - 427 - // Invalidate all caches (tiered storage invalidation is handled by invalidateSiteCache) 428 - await invalidateSiteCache(did, site) 429 - 430 - this.log('Successfully processed delete', { did, site }) 431 - } 432 - 433 - private async handleSettingsChange(did: string, rkey: string) { 434 - this.log('Processing settings change', { did, rkey }) 435 - 436 - // Invalidate in-memory caches (includes metadata which stores settings) 437 - await invalidateSiteCache(did, rkey) 438 - 439 - // Check if site is already cached 440 - const cacheDir = `${CACHE_DIR}/${did}/${rkey}` 441 - const isCached = existsSync(cacheDir) 442 - 443 - if (!isCached) { 444 - this.log('Site not cached yet, checking if fs record exists', { did, rkey }) 445 - 446 - // If site exists on PDS, cache it (which will include the new settings) 447 - try { 448 - const siteRecord = await fetchSiteRecord(did, rkey) 449 - 450 - if (siteRecord) { 451 - this.log('Site record found, triggering full cache with settings', { did, rkey }) 452 - const pdsEndpoint = await getPdsForDid(did) 453 - 454 - if (pdsEndpoint) { 455 - // Mark as being cached 456 - markSiteAsBeingCached(did, rkey) 457 - 458 - try { 459 - await downloadAndCacheSite(did, rkey, siteRecord.record, pdsEndpoint, siteRecord.cid) 460 - this.log('Successfully cached site with new settings', { did, rkey }) 461 - } finally { 462 - unmarkSiteAsBeingCached(did, rkey) 463 - } 464 - } else { 465 - this.log('Could not resolve PDS for DID', { did }) 466 - } 467 - } else { 468 - this.log('No fs record found for site, skipping cache', { did, rkey }) 469 - } 470 - } catch (err) { 471 - this.log('Failed to cache site after settings change', { 472 - did, 473 - rkey, 474 - error: err instanceof Error ? err.message : String(err) 475 - }) 476 - } 477 - 478 - this.log('Successfully processed settings change (new cache)', { did, rkey }) 479 - return 480 - } 481 - 482 - // Site is already cached, just update the settings in metadata 483 - try { 484 - const { fetchSiteSettings, updateCacheMetadataSettings } = await import('./utils') 485 - const settings = await fetchSiteSettings(did, rkey) 486 - await updateCacheMetadataSettings(did, rkey, settings) 487 - this.log('Updated cached settings', { did, rkey, hasSettings: !!settings }) 488 - } catch (err) { 489 - this.log('Failed to update cached settings', { 490 - did, 491 - rkey, 492 - error: err instanceof Error ? err.message : String(err) 493 - }) 494 - } 495 - 496 - this.log('Successfully processed settings change', { did, rkey }) 497 - } 498 - 499 - getHealth() { 500 - const isConnected = this.firehose !== null 501 - const timeSinceLastEvent = Date.now() - this.lastEventTime 502 - 503 - return { 504 - connected: isConnected, 505 - lastEventTime: this.lastEventTime, 506 - timeSinceLastEvent, 507 - queueSize: this.processingQueue.size, 508 - maxConcurrency: this.maxConcurrency, 509 - healthy: isConnected && timeSinceLastEvent < 300000 // 5 minutes 510 - } 511 - } 512 - }
+5 -10
apps/hosting-service/src/lib/redirects.ts
··· 1 - import { readFile } from 'fs/promises'; 2 - import { existsSync } from 'fs'; 3 1 import { parseRedirectsFile, type RedirectRule } from '@wispplace/fs-utils'; 2 + import { storage } from './storage'; 4 3 5 4 // Re-export everything from the shared package 6 5 export { ··· 17 16 * Load redirect rules from a cached site 18 17 */ 19 18 export async function loadRedirectRules(did: string, rkey: string): Promise<RedirectRule[]> { 20 - const CACHE_DIR = process.env.CACHE_DIR || './cache/sites'; 21 - const redirectsPath = `${CACHE_DIR}/${did}/${rkey}/_redirects`; 22 - 23 - if (!existsSync(redirectsPath)) { 24 - return []; 25 - } 26 - 19 + const key = `${did}/${rkey}/_redirects`; 27 20 try { 28 - const content = await readFile(redirectsPath, 'utf-8'); 21 + const data = await storage.get(key); 22 + if (!data) return []; 23 + const content = new TextDecoder().decode(data as Uint8Array); 29 24 return parseRedirectsFile(content); 30 25 } catch (err) { 31 26 console.error('Failed to load _redirects file', err);
-11
apps/hosting-service/src/lib/request-utils.ts
··· 3 3 */ 4 4 5 5 import type { Record as WispSettings } from '@wispplace/lexicons/types/place/wisp/settings'; 6 - import { access } from 'fs/promises'; 7 6 8 7 /** 9 8 * Default index file names to check for directory requests ··· 74 73 /** 75 74 * Async file existence check 76 75 */ 77 - export async function fileExists(path: string): Promise<boolean> { 78 - try { 79 - await access(path); 80 - return true; 81 - } catch { 82 - return false; 83 - } 84 - } 85 - 86 76 /** 87 77 * Extract and normalize headers from request 88 78 */ ··· 93 83 }); 94 84 return headers; 95 85 } 96 -
+49
apps/hosting-service/src/lib/revalidate-metrics.ts
··· 1 + type EnqueueResult = 'enqueued' | 'deduped' | 'disabled' | 'error'; 2 + 3 + interface RevalidateMetrics { 4 + storageMissExpected: number; 5 + revalidateEnqueued: number; 6 + revalidateDeduped: number; 7 + revalidateDisabled: number; 8 + revalidateErrors: number; 9 + lastStorageMissAt: number | null; 10 + lastStorageMissPath: string | null; 11 + } 12 + 13 + const metrics: RevalidateMetrics = { 14 + storageMissExpected: 0, 15 + revalidateEnqueued: 0, 16 + revalidateDeduped: 0, 17 + revalidateDisabled: 0, 18 + revalidateErrors: 0, 19 + lastStorageMissAt: null, 20 + lastStorageMissPath: null, 21 + }; 22 + 23 + export function recordStorageMiss(path: string): void { 24 + metrics.storageMissExpected += 1; 25 + metrics.lastStorageMissAt = Date.now(); 26 + metrics.lastStorageMissPath = path; 27 + } 28 + 29 + export function recordRevalidateResult(result: EnqueueResult): void { 30 + if (result === 'enqueued') { 31 + metrics.revalidateEnqueued += 1; 32 + return; 33 + } 34 + if (result === 'deduped') { 35 + metrics.revalidateDeduped += 1; 36 + return; 37 + } 38 + if (result === 'disabled') { 39 + metrics.revalidateDisabled += 1; 40 + return; 41 + } 42 + if (result === 'error') { 43 + metrics.revalidateErrors += 1; 44 + } 45 + } 46 + 47 + export function getRevalidateMetrics(): RevalidateMetrics { 48 + return { ...metrics }; 49 + }
+83
apps/hosting-service/src/lib/revalidate-queue.ts
··· 1 + import Redis from 'ioredis'; 2 + import { recordRevalidateResult } from './revalidate-metrics'; 3 + 4 + const redisUrl = process.env.REDIS_URL; 5 + const streamName = process.env.WISP_REVALIDATE_STREAM || 'wisp:revalidate'; 6 + const dedupeTtlSeconds = Number.parseInt(process.env.WISP_REVALIDATE_DEDUPE_TTL_SECONDS || '60', 10); 7 + 8 + let client: Redis | null = null; 9 + let loggedMissingRedis = false; 10 + 11 + function getRedisClient(): Redis | null { 12 + if (!redisUrl) { 13 + if (!loggedMissingRedis) { 14 + console.warn('[Revalidate] REDIS_URL not set; skipping queue enqueue'); 15 + loggedMissingRedis = true; 16 + } 17 + return null; 18 + } 19 + 20 + if (!client) { 21 + client = new Redis(redisUrl, { 22 + maxRetriesPerRequest: 2, 23 + enableReadyCheck: true, 24 + }); 25 + 26 + client.on('error', (err) => { 27 + console.error('[Revalidate] Redis error:', err); 28 + }); 29 + } 30 + 31 + return client; 32 + } 33 + 34 + export type EnqueueResult = 'enqueued' | 'deduped' | 'disabled' | 'error'; 35 + 36 + export async function enqueueRevalidate( 37 + did: string, 38 + rkey: string, 39 + reason: string 40 + ): Promise<{ enqueued: boolean; result: EnqueueResult }> { 41 + const redis = getRedisClient(); 42 + if (!redis) { 43 + recordRevalidateResult('disabled'); 44 + return { enqueued: false, result: 'disabled' }; 45 + } 46 + 47 + try { 48 + const dedupeKey = `revalidate:site:${did}:${rkey}`; 49 + const set = await redis.set(dedupeKey, '1', 'NX', 'EX', dedupeTtlSeconds); 50 + if (!set) { 51 + recordRevalidateResult('deduped'); 52 + return { enqueued: false, result: 'deduped' }; 53 + } 54 + 55 + await redis.xadd( 56 + streamName, 57 + '*', 58 + 'did', 59 + did, 60 + 'rkey', 61 + rkey, 62 + 'reason', 63 + reason, 64 + 'ts', 65 + Date.now().toString() 66 + ); 67 + 68 + recordRevalidateResult('enqueued'); 69 + return { enqueued: true, result: 'enqueued' }; 70 + } catch (err) { 71 + recordRevalidateResult('error'); 72 + console.error('[Revalidate] Failed to enqueue', { did, rkey, reason, error: err }); 73 + return { enqueued: false, result: 'error' }; 74 + } 75 + } 76 + 77 + export async function closeRevalidateQueue(): Promise<void> { 78 + if (client) { 79 + const toClose = client; 80 + client = null; 81 + await toClose.quit(); 82 + } 83 + }
+2 -50
apps/hosting-service/src/lib/site-cache.ts
··· 1 1 /** 2 - * Site caching management utilities 2 + * Redirect rules cache utilities 3 3 */ 4 4 5 - import { createLogger } from '@wispplace/observability'; 6 - import { fetchSiteRecord, getPdsForDid, downloadAndCacheSite, isCached } from './utils'; 7 - import { markSiteAsBeingCached, unmarkSiteAsBeingCached, LRUCache } from './cache'; 5 + import { LRUCache } from './cache'; 8 6 import type { RedirectRule } from './redirects'; 9 - 10 - const logger = createLogger('hosting-service'); 11 7 12 8 // Cache for redirect rules (per site) - LRU with 1000 site limit 13 9 // Each entry is relatively small (array of redirect rules), so 1000 sites should be < 10MB ··· 39 35 const estimatedSize = rules.length * 100; 40 36 redirectRulesCache.set(cacheKey, rules, estimatedSize); 41 37 } 42 - 43 - /** 44 - * Helper to ensure site is cached 45 - * Returns true if site is successfully cached, false otherwise 46 - */ 47 - export async function ensureSiteCached(did: string, rkey: string): Promise<boolean> { 48 - if (await isCached(did, rkey)) { 49 - console.log(`[Cache Hit] Site already cached - ${did}:${rkey}`); 50 - return true; 51 - } 52 - 53 - // Fetch and cache the site 54 - console.log(`[On-Demand] Caching site on first request - ${did}:${rkey}`); 55 - const siteData = await fetchSiteRecord(did, rkey); 56 - if (!siteData) { 57 - logger.error('Site record not found', null, { did, rkey }); 58 - return false; 59 - } 60 - 61 - const pdsEndpoint = await getPdsForDid(did); 62 - if (!pdsEndpoint) { 63 - logger.error('PDS not found for DID', null, { did }); 64 - return false; 65 - } 66 - 67 - // Mark site as being cached to prevent serving stale content during update 68 - markSiteAsBeingCached(did, rkey); 69 - 70 - try { 71 - await downloadAndCacheSite(did, rkey, siteData.record, pdsEndpoint, siteData.cid); 72 - // Clear redirect rules cache since the site was updated 73 - clearRedirectRulesCache(did, rkey); 74 - logger.info('Site cached successfully (on-demand)', { did, rkey }); 75 - console.log(`[On-Demand] Successfully cached ${did}:${rkey}`); 76 - return true; 77 - } catch (err) { 78 - logger.error('Failed to cache site on-demand', err, { did, rkey }); 79 - return false; 80 - } finally { 81 - // Always unmark, even if caching fails 82 - unmarkSiteAsBeingCached(did, rkey); 83 - } 84 - } 85 -
+10 -18
apps/hosting-service/src/lib/storage.ts
··· 7 7 * - Cold (S3/R2): Object storage as source of truth (optional) 8 8 * 9 9 * When S3 is not configured, falls back to disk-only mode (warm tier acts as source of truth). 10 - * In cache-only mode (non-master nodes), S3 writes are skipped even if configured. 10 + * Hosting service is read-only: S3 writes are always skipped. 11 11 */ 12 12 13 13 import { ··· 24 24 const HOT_CACHE_COUNT = parseInt(process.env.HOT_CACHE_COUNT || '500', 10); 25 25 const WARM_CACHE_SIZE = parseInt(process.env.WARM_CACHE_SIZE || '10737418240', 10); // 10GB default 26 26 const WARM_EVICTION_POLICY = (process.env.WARM_EVICTION_POLICY || 'lru') as 'lru' | 'fifo' | 'size'; 27 - 28 - // Cache-only mode: skip S3 writes (non-master nodes) 29 - // This is the same flag used to skip database writes 30 - const CACHE_ONLY_MODE = process.env.CACHE_ONLY_MODE === 'true'; 31 27 32 28 // S3/Cold tier configuration (optional) 33 29 const S3_BUCKET = process.env.S3_BUCKET || ''; ··· 55 51 }; 56 52 57 53 /** 58 - * Read-only wrapper for S3 tier in cache-only mode. 59 - * Allows reads from S3 but skips all writes (for non-master nodes). 54 + * Read-only wrapper for S3 tier. 55 + * Allows reads from S3 but skips all writes (hosting-service is read-only). 60 56 */ 61 57 class ReadOnlyS3Tier implements StorageTier { 62 58 private static hasLoggedWriteSkip = false; ··· 92 88 return this.tier.getStats(); 93 89 } 94 90 95 - // Write operations - no-op in cache-only mode 91 + // Write operations - no-op in read-only mode 96 92 async set(key: string, _data: Uint8Array, _metadata: StorageMetadata) { 97 93 this.logWriteSkip('set', key); 98 94 } ··· 120 116 private logWriteSkip(operation: string, key: string) { 121 117 // Only log once to avoid spam 122 118 if (!ReadOnlyS3Tier.hasLoggedWriteSkip) { 123 - console.log(`[Storage] Cache-only mode: skipping S3 writes (operation: ${operation})`); 119 + console.log(`[Storage] Read-only mode: skipping S3 writes (operation: ${operation})`); 124 120 ReadOnlyS3Tier.hasLoggedWriteSkip = true; 125 121 } 126 122 } ··· 157 153 prefix: S3_PREFIX, 158 154 }); 159 155 160 - // In cache-only mode, wrap S3 tier to make it read-only 161 - coldTier = CACHE_ONLY_MODE ? new ReadOnlyS3Tier(s3Tier) : s3Tier; 156 + // Hosting service is read-only: always wrap S3 tier to make it read-only 157 + coldTier = new ReadOnlyS3Tier(s3Tier); 162 158 warmTier = diskTier; 163 159 164 - if (CACHE_ONLY_MODE) { 165 - console.log('[Storage] Cache-only mode: S3 as read-only cold tier (no writes), disk as warm tier'); 166 - } else { 167 - console.log('[Storage] Using S3 as cold tier, disk as warm tier'); 168 - } 160 + console.log('[Storage] Read-only mode: S3 as cold tier (no writes), disk as warm tier'); 169 161 } else { 170 162 // Disk-only mode: disk tier acts as source of truth (cold) 171 163 coldTier = diskTier; ··· 190 182 191 183 // Placement rules: determine which tiers each file goes to 192 184 placementRules: [ 193 - // Metadata is critical: frequently accessed for cache validity checks 185 + // Rewritten HTML: keep hot for fast serving 194 186 { 195 - pattern: '**/.metadata.json', 187 + pattern: '**/.rewritten/**/*.html', 196 188 tiers: ['hot', 'warm', 'cold'], 197 189 }, 198 190
+15 -438
apps/hosting-service/src/lib/utils.ts
··· 1 - import type { Record as WispFsRecord, Directory, Entry, File } from '@wispplace/lexicons/types/place/wisp/fs'; 1 + import type { Directory, Entry } from '@wispplace/lexicons/types/place/wisp/fs'; 2 2 import type { Record as SubfsRecord } from '@wispplace/lexicons/types/place/wisp/subfs'; 3 3 import type { Record as WispSettings } from '@wispplace/lexicons/types/place/wisp/settings'; 4 - import { existsSync, mkdirSync, readFileSync, rmSync } from 'fs'; 5 - import { writeFile, readFile, rename } from 'fs/promises'; 6 - import { Readable } from 'stream'; 7 - import { safeFetchJson, safeFetchBlob } from '@wispplace/safe-fetch'; 8 - import { CID } from 'multiformats'; 4 + import { safeFetchJson } from '@wispplace/safe-fetch'; 9 5 import { extractBlobCid, resolveDid, getPdsForDid, didWebToHttps } from '@wispplace/atproto-utils'; 10 - import { sanitizePath, collectFileCidsFromEntries, countFilesInDirectory } from '@wispplace/fs-utils'; 11 - import { shouldCompressMimeType } from '@wispplace/atproto-utils/compression'; 12 - import { MAX_BLOB_SIZE, MAX_FILE_COUNT, MAX_SITE_SIZE } from '@wispplace/constants'; 13 - import { storage } from './storage'; 6 + import { sanitizePath } from '@wispplace/fs-utils'; 7 + import { getSiteSettingsCache } from './db'; 14 8 15 9 // Re-export shared utilities for local usage and tests 16 10 export { extractBlobCid, sanitizePath, resolveDid, getPdsForDid, didWebToHttps }; 17 - 18 - const CACHE_DIR = process.env.CACHE_DIR || './cache/sites'; 19 - const CACHE_TTL = 14 * 24 * 60 * 60 * 1000; // 14 days cache TTL 20 - 21 - interface CacheMetadata { 22 - recordCid: string; 23 - cachedAt: number; 24 - did: string; 25 - rkey: string; 26 - // Map of file path to blob CID for incremental updates 27 - fileCids?: Record<string, string>; 28 - // Site settings (null = explicitly no settings, undefined = not yet checked) 29 - settings?: WispSettings | null; 30 - } 31 - 32 - 33 - export async function fetchSiteRecord(did: string, rkey: string): Promise<{ record: WispFsRecord; cid: string } | null> { 34 - try { 35 - const pdsEndpoint = await getPdsForDid(did); 36 - if (!pdsEndpoint) { 37 - console.error('[hosting-service] Failed to get PDS endpoint for DID', { did, rkey }); 38 - return null; 39 - } 40 - 41 - const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(rkey)}`; 42 - const data = await safeFetchJson(url); 43 - 44 - return { 45 - record: data.value as WispFsRecord, 46 - cid: data.cid || '' 47 - }; 48 - } catch (err) { 49 - const errorCode = (err as any)?.code; 50 - const errorMsg = err instanceof Error ? err.message : String(err); 51 - 52 - // Better error logging to distinguish between network errors and 404s 53 - if (errorMsg.includes('HTTP 404') || errorMsg.includes('Not Found')) { 54 - console.log('[hosting-service] Site record not found', { did, rkey }); 55 - } else if (errorCode && ['ECONNRESET', 'ERR_SSL_TLSV1_ALERT_INTERNAL_ERROR', 'ETIMEDOUT'].includes(errorCode)) { 56 - console.error('[hosting-service] Network/SSL error fetching site record (after retries)', { 57 - did, 58 - rkey, 59 - error: errorMsg, 60 - code: errorCode 61 - }); 62 - } else { 63 - console.error('[hosting-service] Failed to fetch site record', { 64 - did, 65 - rkey, 66 - error: errorMsg, 67 - code: errorCode 68 - }); 69 - } 70 - 71 - return null; 72 - } 73 - } 74 - 75 - export async function fetchSiteSettings(did: string, rkey: string): Promise<WispSettings | null> { 76 - try { 77 - const pdsEndpoint = await getPdsForDid(did); 78 - if (!pdsEndpoint) return null; 79 - 80 - const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.settings&rkey=${encodeURIComponent(rkey)}`; 81 - const data = await safeFetchJson(url); 82 - 83 - return data.value as WispSettings; 84 - } catch (err) { 85 - // Settings are optional, so return null if not found 86 - return null; 87 - } 88 - } 89 - 90 - /** 91 - * Calculate total size of all blobs in a directory tree from manifest metadata 92 - */ 93 - function calculateTotalBlobSize(directory: Directory): number { 94 - let totalSize = 0; 95 - 96 - function sumBlobSizes(entries: Entry[]) { 97 - for (const entry of entries) { 98 - const node = entry.node; 99 - 100 - if ('type' in node && node.type === 'directory' && 'entries' in node) { 101 - // Recursively sum subdirectories 102 - sumBlobSizes(node.entries); 103 - } else if ('type' in node && node.type === 'file' && 'blob' in node) { 104 - // Add blob size from manifest 105 - const fileNode = node as File; 106 - const blobSize = (fileNode.blob as any)?.size || 0; 107 - totalSize += blobSize; 108 - } 109 - } 110 - } 111 - 112 - sumBlobSizes(directory.entries); 113 - return totalSize; 114 - } 115 11 116 12 /** 117 13 * Extract all subfs URIs from a directory tree with their mount paths ··· 298 194 } 299 195 300 196 301 - export async function downloadAndCacheSite(did: string, rkey: string, record: WispFsRecord, pdsEndpoint: string, recordCid: string): Promise<void> { 302 - console.log('Caching site', did, rkey); 197 + export async function getCachedSettings(did: string, rkey: string): Promise<WispSettings | null> { 198 + const cached = await getSiteSettingsCache(did, rkey); 199 + if (!cached) return null; 303 200 304 - if (!record.root) { 305 - console.error('Record missing root directory:', JSON.stringify(record, null, 2)); 306 - throw new Error('Invalid record structure: missing root directory'); 307 - } 308 - 309 - if (!record.root.entries || !Array.isArray(record.root.entries)) { 310 - console.error('Record root missing entries array:', JSON.stringify(record.root, null, 2)); 311 - throw new Error('Invalid record structure: root missing entries array'); 312 - } 313 - 314 - // Expand subfs nodes before caching 315 - const expandedRoot = await expandSubfsNodes(record.root, pdsEndpoint); 316 - 317 - // Verify all subfs nodes were expanded 318 - const remainingSubfs = extractSubfsUris(expandedRoot); 319 - if (remainingSubfs.length > 0) { 320 - console.warn(`[Cache] Warning: ${remainingSubfs.length} subfs nodes remain unexpanded after expansion`, remainingSubfs); 321 - } 322 - 323 - // Validate file count limit 324 - const fileCount = countFilesInDirectory(expandedRoot); 325 - if (fileCount > MAX_FILE_COUNT) { 326 - throw new Error(`Site exceeds file count limit: ${fileCount} files (max ${MAX_FILE_COUNT})`); 327 - } 328 - console.log(`[Cache] File count validation passed: ${fileCount} files (limit: ${MAX_FILE_COUNT})`); 329 - 330 - // Validate total size from blob metadata 331 - const totalBlobSize = calculateTotalBlobSize(expandedRoot); 332 - if (totalBlobSize > MAX_SITE_SIZE) { 333 - throw new Error(`Site exceeds size limit: ${(totalBlobSize / 1024 / 1024).toFixed(2)}MB (max ${(MAX_SITE_SIZE / 1024 / 1024).toFixed(0)}MB)`); 334 - } 335 - console.log(`[Cache] Size validation passed: ${(totalBlobSize / 1024 / 1024).toFixed(2)}MB (limit: ${(MAX_SITE_SIZE / 1024 / 1024).toFixed(0)}MB)`); 336 - 337 - // Get existing cache metadata to check for incremental updates 338 - const existingMetadata = await getCacheMetadata(did, rkey); 339 - const existingFileCids = existingMetadata?.fileCids || {}; 340 - 341 - // Collect file CIDs from the new record (using expanded root) 342 - const newFileCids: Record<string, string> = {}; 343 - collectFileCidsFromEntries(expandedRoot.entries, '', newFileCids); 344 - 345 - // Fetch site settings (optional) 346 - const settings = await fetchSiteSettings(did, rkey); 347 - 348 - // Determine if this is an incremental update or full cache 349 - const isIncremental = Object.keys(existingFileCids).length > 0; 350 - const updateType = isIncremental ? 'incremental update' : 'full cache'; 351 - console.log(`[Cache] Starting ${updateType} for ${did}:${rkey}`); 352 - 353 - // Download files directly to tiered storage (with incremental logic) 354 - await cacheFiles(did, rkey, expandedRoot.entries, pdsEndpoint, '', existingFileCids); 355 - await saveCacheMetadata(did, rkey, recordCid, newFileCids, settings); 356 - 357 - console.log(`[Cache] Successfully cached site ${did}:${rkey} (${updateType})`); 358 - } 359 - 360 - 361 - async function cacheFiles( 362 - did: string, 363 - site: string, 364 - entries: Entry[], 365 - pdsEndpoint: string, 366 - pathPrefix: string, 367 - existingFileCids: Record<string, string> = {} 368 - ): Promise<void> { 369 - // Collect file download tasks (skip unchanged files) 370 - const downloadTasks: Array<() => Promise<void>> = []; 371 - let skippedCount = 0; 372 - 373 - function collectFileTasks( 374 - entries: Entry[], 375 - currentPathPrefix: string 376 - ) { 377 - for (const entry of entries) { 378 - const currentPath = currentPathPrefix ? `${currentPathPrefix}/${entry.name}` : entry.name; 379 - const node = entry.node; 380 - 381 - if ('type' in node && node.type === 'directory' && 'entries' in node) { 382 - collectFileTasks(node.entries, currentPath); 383 - } else if ('type' in node && node.type === 'file' && 'blob' in node) { 384 - const fileNode = node as File; 385 - const cid = extractBlobCid(fileNode.blob); 386 - 387 - // Check if file is unchanged (same CID as existing cache) 388 - if (cid && existingFileCids[currentPath] === cid) { 389 - // File unchanged - skip download (already in tiered storage) 390 - skippedCount++; 391 - } else { 392 - // File new or changed - download it 393 - downloadTasks.push(() => cacheFileBlob( 394 - did, 395 - site, 396 - currentPath, 397 - fileNode.blob, 398 - pdsEndpoint, 399 - fileNode.encoding, 400 - fileNode.mimeType, 401 - fileNode.base64 402 - )); 403 - } 404 - } 405 - } 406 - } 407 - 408 - collectFileTasks(entries, pathPrefix); 409 - 410 - console.log(`[Incremental Update] Files to copy: ${skippedCount}, Files to download: ${downloadTasks.length}`); 411 - 412 - // Download new/changed files concurrently 413 - const downloadLimit = 20; 414 - let successCount = 0; 415 - let failureCount = 0; 416 - 417 - for (let i = 0; i < downloadTasks.length; i += downloadLimit) { 418 - const batch = downloadTasks.slice(i, i + downloadLimit); 419 - const results = await Promise.allSettled(batch.map(task => task())); 420 - 421 - // Count successes and failures 422 - results.forEach((result, index) => { 423 - if (result.status === 'fulfilled') { 424 - successCount++; 425 - } else { 426 - failureCount++; 427 - console.error(`[Cache] Failed to download file (continuing with others):`, result.reason); 428 - } 429 - }); 430 - 431 - if (downloadTasks.length > downloadLimit) { 432 - console.log(`[Cache Progress] Downloaded ${Math.min(i + downloadLimit, downloadTasks.length)}/${downloadTasks.length} files (${failureCount} failed)`); 433 - } 434 - } 435 - 436 - if (failureCount > 0) { 437 - console.warn(`[Cache] Completed with ${successCount} successful and ${failureCount} failed file downloads`); 438 - } 439 - } 440 - 441 - async function cacheFileBlob( 442 - did: string, 443 - site: string, 444 - filePath: string, 445 - blobRef: any, 446 - pdsEndpoint: string, 447 - encoding?: 'gzip', 448 - mimeType?: string, 449 - base64?: boolean 450 - ): Promise<void> { 451 - const cid = extractBlobCid(blobRef); 452 - if (!cid) { 453 - console.error('Could not extract CID from blob', blobRef); 454 - return; 455 - } 456 - 457 - const blobUrl = `${pdsEndpoint}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(cid)}`; 458 - 459 - console.log(`[Cache] Fetching blob for file: ${filePath}, CID: ${cid}`); 460 - 461 - let content = await safeFetchBlob(blobUrl, { maxSize: MAX_BLOB_SIZE, timeout: 300000 }); 462 - 463 - // If content is base64-encoded, decode it back to raw binary (gzipped or not) 464 - if (base64) { 465 - // Decode base64 directly from raw bytes - no string conversion 466 - // The blob contains base64-encoded text as raw bytes, decode it in-place 467 - const textDecoder = new TextDecoder(); 468 - const base64String = textDecoder.decode(content); 469 - content = Buffer.from(base64String, 'base64'); 470 - } 471 - 472 - // Use the shared function to determine if this should remain compressed 473 - const shouldStayCompressed = shouldCompressMimeType(mimeType); 474 - 475 - // Decompress files that shouldn't be stored compressed 476 - if (encoding === 'gzip' && !shouldStayCompressed && content.length >= 2 && 477 - content[0] === 0x1f && content[1] === 0x8b) { 478 - try { 479 - const { gunzipSync } = await import('zlib'); 480 - const decompressed = gunzipSync(content); 481 - content = decompressed; 482 - // Clear the encoding flag since we're storing decompressed 483 - encoding = undefined; 484 - } catch (error) { 485 - console.error(`Failed to decompress ${filePath}, storing original gzipped content:`, error); 486 - } 487 - } 488 - 489 - // Write to tiered storage with metadata 490 - const stream = Readable.from([content]); 491 - const key = `${did}/${site}/${filePath}`; 492 - 493 - // Build metadata object, only including defined values 494 - const customMetadata: Record<string, string> = {}; 495 - if (encoding) customMetadata.encoding = encoding; 496 - if (mimeType) customMetadata.mimeType = mimeType; 497 - 498 - await storage.setStream(key, stream, { 499 - size: content.length, 500 - skipTiers: ['hot'], // Don't put in memory on ingest, only on access 501 - metadata: customMetadata, 502 - }); 503 - 504 - // Log completion with tier info 505 - const tierInfo = 'to warm/cold tiers'; 506 - if (encoding === 'gzip' && mimeType) { 507 - console.log(`[Cache] Stored ${filePath} ${tierInfo} (${content.length} bytes, gzipped, ${mimeType})`); 508 - } else { 509 - console.log(`[Cache] Stored ${filePath} ${tierInfo} (${content.length} bytes)`); 510 - } 511 - } 512 - 513 - 514 - export function getCachedFilePath(did: string, site: string, filePath: string): string { 515 - const sanitizedPath = sanitizePath(filePath); 516 - return `${CACHE_DIR}/${did}/${site}/${sanitizedPath}`; 517 - } 518 - 519 - /** 520 - * Check if a site exists in any tier of the cache (without checking metadata) 521 - * This is a quick existence check - for actual retrieval, use storage.get() 522 - */ 523 - export async function isCached(did: string, site: string): Promise<boolean> { 524 - // Check if any file exists for this site by checking for the index.html 525 - // If index.html exists, the site is cached 526 - const indexKey = `${did}/${site}/index.html`; 527 - return await storage.exists(indexKey); 528 - } 529 - 530 - async function saveCacheMetadata(did: string, rkey: string, recordCid: string, fileCids?: Record<string, string>, settings?: WispSettings | null): Promise<void> { 531 - const metadata: CacheMetadata = { 532 - recordCid, 533 - cachedAt: Date.now(), 534 - did, 535 - rkey, 536 - fileCids, 537 - settings: settings || undefined 201 + return { 202 + $type: 'place.wisp.settings', 203 + directoryListing: cached.directory_listing, 204 + spaMode: cached.spa_mode ?? undefined, 205 + custom404: cached.custom_404 ?? undefined, 206 + indexFiles: cached.index_files ?? undefined, 207 + cleanUrls: cached.clean_urls, 208 + headers: cached.headers ?? undefined, 538 209 }; 539 - 540 - // Store through tiered storage for persistence to S3/cold tier 541 - const metadataKey = `${did}/${rkey}/.metadata.json`; 542 - const metadataBytes = new TextEncoder().encode(JSON.stringify(metadata, null, 2)); 543 - await storage.set(metadataKey, metadataBytes); 544 - } 545 - 546 - async function getCacheMetadata(did: string, rkey: string): Promise<CacheMetadata | null> { 547 - try { 548 - // Retrieve metadata from tiered storage 549 - const metadataKey = `${did}/${rkey}/.metadata.json`; 550 - const data = await storage.get(metadataKey); 551 - 552 - if (!data) return null; 553 - 554 - // Deserialize from Uint8Array to JSON (storage uses identity serialization) 555 - const jsonString = new TextDecoder().decode(data as Uint8Array); 556 - return JSON.parse(jsonString) as CacheMetadata; 557 - } catch (err) { 558 - console.error('Failed to read cache metadata', err); 559 - return null; 560 - } 561 - } 562 - 563 - export async function getCachedSettings(did: string, rkey: string): Promise<WispSettings | null> { 564 - const metadata = await getCacheMetadata(did, rkey); 565 - 566 - // If metadata has settings (including explicit null for "no settings"), return them 567 - if (metadata && 'settings' in metadata) { 568 - return metadata.settings ?? null; 569 - } 570 - 571 - // If metadata exists but has never checked for settings, try to fetch from PDS and update cache 572 - if (metadata) { 573 - console.log('[Cache] Metadata missing settings, fetching from PDS', { did, rkey }); 574 - try { 575 - const settings = await fetchSiteSettings(did, rkey); 576 - // Update cache with settings (or null if none found) 577 - // This caches the "no settings" state to avoid repeated PDS fetches 578 - await updateCacheMetadataSettings(did, rkey, settings); 579 - console.log('[Cache] Updated metadata with fetched settings', { did, rkey, hasSettings: !!settings }); 580 - return settings; 581 - } catch (err) { 582 - console.error('[Cache] Failed to fetch/update settings', { did, rkey, err }); 583 - } 584 - } 585 - 586 - return null; 587 - } 588 - 589 - export async function updateCacheMetadataSettings(did: string, rkey: string, settings: WispSettings | null): Promise<void> { 590 - try { 591 - // Read existing metadata from tiered storage 592 - const metadata = await getCacheMetadata(did, rkey); 593 - 594 - if (!metadata) { 595 - console.warn('Metadata does not exist, cannot update settings', { did, rkey }); 596 - return; 597 - } 598 - 599 - // Update settings field 600 - // Store null explicitly to cache "no settings" state and avoid repeated fetches 601 - metadata.settings = settings ?? null; 602 - 603 - // Write back through tiered storage 604 - // Convert to Uint8Array since storage is typed for binary data 605 - const metadataKey = `${did}/${rkey}/.metadata.json`; 606 - const metadataBytes = new TextEncoder().encode(JSON.stringify(metadata, null, 2)); 607 - await storage.set(metadataKey, metadataBytes); 608 - console.log('Updated metadata settings', { did, rkey, hasSettings: !!settings }); 609 - } catch (err) { 610 - console.error('Failed to update metadata settings', err); 611 - throw err; 612 - } 613 - } 614 - 615 - export async function isCacheValid(did: string, rkey: string, currentRecordCid?: string): Promise<boolean> { 616 - const metadata = await getCacheMetadata(did, rkey); 617 - if (!metadata) return false; 618 - 619 - // Check if cache has expired (14 days TTL) 620 - const cacheAge = Date.now() - metadata.cachedAt; 621 - if (cacheAge > CACHE_TTL) { 622 - console.log('[Cache] Cache expired for', did, rkey); 623 - return false; 624 - } 625 - 626 - // If current CID is provided, verify it matches 627 - if (currentRecordCid && metadata.recordCid !== currentRecordCid) { 628 - console.log('[Cache] CID mismatch for', did, rkey, 'cached:', metadata.recordCid, 'current:', currentRecordCid); 629 - return false; 630 - } 631 - 632 - return true; 633 210 }
+6 -49
apps/hosting-service/src/server.ts
··· 10 10 import { logCollector, errorTracker, metricsCollector } from '@wispplace/observability'; 11 11 import { observabilityMiddleware, observabilityErrorHandler } from '@wispplace/observability/middleware/hono'; 12 12 import { sanitizePath } from '@wispplace/fs-utils'; 13 - import { isSiteBeingCached } from './lib/cache'; 14 13 import { isValidRkey, extractHeaders } from './lib/request-utils'; 15 - import { siteUpdatingResponse } from './lib/page-generators'; 16 - import { ensureSiteCached } from './lib/site-cache'; 17 14 import { serveFromCache, serveFromCacheWithRewrite } from './lib/file-serving'; 15 + import { getRevalidateMetrics } from './lib/revalidate-metrics'; 18 16 19 17 const BASE_HOST = process.env.BASE_HOST || 'wisp.place'; 20 18 ··· 40 38 app.get('/*', async (c) => { 41 39 const url = new URL(c.req.url); 42 40 const hostname = c.req.header('host') || ''; 41 + const hostnameWithoutPort = hostname.split(':')[0]; 43 42 const rawPath = url.pathname.replace(/^\//, ''); 44 43 const path = sanitizePath(rawPath); 45 44 46 45 // Check if this is sites.wisp.place subdomain (strip port for comparison) 47 - const hostnameWithoutPort = hostname.split(':')[0]; 48 46 if (hostnameWithoutPort === `sites.${BASE_HOST}`) { 49 47 // Sanitize the path FIRST to prevent path traversal 50 48 const sanitizedFullPath = sanitizePath(rawPath); ··· 82 80 83 81 console.log(`[Server] sites.wisp.place request: identifier=${identifier}, site=${site}, filePath=${filePath}`); 84 82 85 - // Check if site is currently being cached - return updating response early 86 - if (isSiteBeingCached(did, site)) { 87 - return siteUpdatingResponse(); 88 - } 89 - 90 - // Ensure site is cached 91 - const cached = await ensureSiteCached(did, site); 92 - if (!cached) { 93 - return c.text('Site not found', 404); 94 - } 95 - 96 83 // Serve with HTML path rewriting to handle absolute paths 97 84 const basePath = `/${identifier}/${site}/`; 98 85 console.log(`[Server] Serving with basePath: ${basePath}`); ··· 128 115 return c.text('Invalid site configuration', 500); 129 116 } 130 117 131 - // Check if site is currently being cached - return updating response early 132 - if (isSiteBeingCached(customDomain.did, rkey)) { 133 - return siteUpdatingResponse(); 134 - } 135 - 136 - const cached = await ensureSiteCached(customDomain.did, rkey); 137 - if (!cached) { 138 - return c.text('Site not found', 404); 139 - } 140 - 141 118 const headers = extractHeaders(c.req.raw.headers); 142 119 return serveFromCache(customDomain.did, rkey, path, c.req.url, headers); 143 120 } 144 121 145 122 // Route 2: Registered subdomains - /*.wisp.place/* 146 - if (hostname.endsWith(`.${BASE_HOST}`)) { 147 - const domainInfo = await getWispDomain(hostname); 123 + if (hostnameWithoutPort.endsWith(`.${BASE_HOST}`)) { 124 + const domainInfo = await getWispDomain(hostnameWithoutPort); 148 125 if (!domainInfo) { 149 126 return c.text('Subdomain not registered', 404); 150 127 } ··· 158 135 return c.text('Invalid site configuration', 500); 159 136 } 160 137 161 - // Check if site is currently being cached - return updating response early 162 - if (isSiteBeingCached(domainInfo.did, rkey)) { 163 - return siteUpdatingResponse(); 164 - } 165 - 166 - const cached = await ensureSiteCached(domainInfo.did, rkey); 167 - if (!cached) { 168 - return c.text('Site not found', 404); 169 - } 170 - 171 138 const headers = extractHeaders(c.req.raw.headers); 172 139 return serveFromCache(domainInfo.did, rkey, path, c.req.url, headers); 173 140 } 174 141 175 142 // Route 1: Custom domains - /* 176 - const customDomain = await getCustomDomain(hostname); 143 + const customDomain = await getCustomDomain(hostnameWithoutPort); 177 144 if (!customDomain) { 178 145 return c.text('Custom domain not found or not verified', 404); 179 146 } ··· 185 152 const rkey = customDomain.rkey; 186 153 if (!isValidRkey(rkey)) { 187 154 return c.text('Invalid site configuration', 500); 188 - } 189 - 190 - // Check if site is currently being cached - return updating response early 191 - if (isSiteBeingCached(customDomain.did, rkey)) { 192 - return siteUpdatingResponse(); 193 - } 194 - 195 - const cached = await ensureSiteCached(customDomain.did, rkey); 196 - if (!cached) { 197 - return c.text('Site not found', 404); 198 155 } 199 156 200 157 const headers = extractHeaders(c.req.raw.headers); ··· 225 182 const query = c.req.query(); 226 183 const timeWindow = query.timeWindow ? parseInt(query.timeWindow as string) : 3600000; 227 184 const stats = metricsCollector.getStats('hosting-service', timeWindow); 228 - return c.json({ stats, timeWindow }); 185 + return c.json({ stats, revalidate: getRevalidateMetrics(), timeWindow }); 229 186 }); 230 187 231 188 app.get('/__internal__/observability/cache', async (c) => {
+4 -2
apps/main-app/src/lib/db.ts
··· 138 138 // Site settings cache table - cached place.wisp.settings records 139 139 await db` 140 140 CREATE TABLE IF NOT EXISTS site_settings_cache ( 141 - did TEXT PRIMARY KEY, 141 + did TEXT NOT NULL, 142 + rkey TEXT NOT NULL, 142 143 record_cid TEXT NOT NULL, 143 144 directory_listing BOOLEAN NOT NULL DEFAULT false, 144 145 spa_mode TEXT, ··· 147 148 clean_urls BOOLEAN NOT NULL DEFAULT true, 148 149 headers JSONB, 149 150 cached_at BIGINT DEFAULT EXTRACT(EPOCH FROM NOW()), 150 - updated_at BIGINT DEFAULT EXTRACT(EPOCH FROM NOW()) 151 + updated_at BIGINT DEFAULT EXTRACT(EPOCH FROM NOW()), 152 + PRIMARY KEY (did, rkey) 151 153 ) 152 154 `; 153 155
+20
bun.lock
··· 31 31 "@wispplace/safe-fetch": "workspace:*", 32 32 "@wispplace/tiered-storage": "workspace:*", 33 33 "hono": "^4.10.4", 34 + "ioredis": "^5.9.2", 34 35 "multiformats": "^13.4.1", 35 36 "postgres": "^3.4.5", 36 37 }, ··· 59 60 "@wispplace/safe-fetch": "workspace:*", 60 61 "@wispplace/tiered-storage": "workspace:*", 61 62 "hono": "^4.10.4", 63 + "ioredis": "^5.9.2", 62 64 "mime-types": "^2.1.35", 63 65 "multiformats": "^13.4.1", 64 66 "postgres": "^3.4.5", ··· 578 580 579 581 "@humanwhocodes/retry": ["@humanwhocodes/retry@0.4.3", "", {}, "sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ=="], 580 582 583 + "@ioredis/commands": ["@ioredis/commands@1.5.0", "", {}, "sha512-eUgLqrMf8nJkZxT24JvVRrQya1vZkQh8BBeYNwGDqa5I0VUi8ACx7uFvAaLxintokpTenkK6DASvo/bvNbBGow=="], 584 + 581 585 "@ipld/dag-cbor": ["@ipld/dag-cbor@7.0.3", "", { "dependencies": { "cborg": "^1.6.0", "multiformats": "^9.5.4" } }, "sha512-1VVh2huHsuohdXC1bGJNE8WR72slZ9XE2T3wbBBq31dm7ZBatmKLLxrB+XAqafxfRFjv08RZmj/W/ZqaM13AuA=="], 582 586 583 587 "@jridgewell/gen-mapping": ["@jridgewell/gen-mapping@0.3.13", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.0", "@jridgewell/trace-mapping": "^0.3.24" } }, "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA=="], ··· 1126 1130 1127 1131 "clsx": ["clsx@2.1.1", "", {}, "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA=="], 1128 1132 1133 + "cluster-key-slot": ["cluster-key-slot@1.1.2", "", {}, "sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA=="], 1134 + 1129 1135 "code-block-writer": ["code-block-writer@13.0.3", "", {}, "sha512-Oofo0pq3IKnsFtuHqSF7TqBfr71aeyZDVJ0HpmqB7FBM2qEigL0iPONSCZSO9pE9dZTAxANe5XHG9Uy0YMv8cg=="], 1130 1136 1131 1137 "color-convert": ["color-convert@2.0.1", "", { "dependencies": { "color-name": "~1.1.4" } }, "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ=="], ··· 1159 1165 "default-browser-id": ["default-browser-id@5.0.1", "", {}, "sha512-x1VCxdX4t+8wVfd1so/9w+vQ4vx7lKd2Qp5tDRutErwmR85OgmfX7RlLRMWafRMY7hbEiXIbudNrjOAPa/hL8Q=="], 1160 1166 1161 1167 "define-lazy-prop": ["define-lazy-prop@3.0.0", "", {}, "sha512-N+MeXYoqr3pOgn8xfyRPREN7gHakLYjhsHhWGT3fWAiL4IkAt0iDw14QiiEm2bE30c5XX5q0FtAA3CK5f9/BUg=="], 1168 + 1169 + "denque": ["denque@2.1.0", "", {}, "sha512-HVQE3AAb/pxF8fQAoiqpvg9i3evqug3hoiwakOyZAwJm+6vZehbkYXZ0l4JxS+I3QxM97v5aaRNhj8v5oBhekw=="], 1162 1170 1163 1171 "depd": ["depd@2.0.0", "", {}, "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw=="], 1164 1172 ··· 1313 1321 "imurmurhash": ["imurmurhash@0.1.4", "", {}, "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA=="], 1314 1322 1315 1323 "inherits": ["inherits@2.0.4", "", {}, "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="], 1324 + 1325 + "ioredis": ["ioredis@5.9.2", "", { "dependencies": { "@ioredis/commands": "1.5.0", "cluster-key-slot": "^1.1.0", "debug": "^4.3.4", "denque": "^2.1.0", "lodash.defaults": "^4.2.0", "lodash.isarguments": "^3.1.0", "redis-errors": "^1.2.0", "redis-parser": "^3.0.0", "standard-as-callback": "^2.1.0" } }, "sha512-tAAg/72/VxOUW7RQSX1pIxJVucYKcjFjfvj60L57jrZpYCHC3XN0WCQ3sNYL4Gmvv+7GPvTAjc+KSdeNuE8oWQ=="], 1316 1326 1317 1327 "ipaddr.js": ["ipaddr.js@1.9.1", "", {}, "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g=="], 1318 1328 ··· 1384 1394 1385 1395 "lodash.camelcase": ["lodash.camelcase@4.3.0", "", {}, "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA=="], 1386 1396 1397 + "lodash.defaults": ["lodash.defaults@4.2.0", "", {}, "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ=="], 1398 + 1399 + "lodash.isarguments": ["lodash.isarguments@3.1.0", "", {}, "sha512-chi4NHZlZqZD18a0imDHnZPrDeBbTtVN7GXMwuGdRH9qotxAjYs3aVLKc7zNOG9eddR5Ksd8rvFEBc9SsggPpg=="], 1400 + 1387 1401 "lodash.merge": ["lodash.merge@4.6.2", "", {}, "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ=="], 1388 1402 1389 1403 "long": ["long@5.3.2", "", {}, "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA=="], ··· 1536 1550 1537 1551 "real-require": ["real-require@0.2.0", "", {}, "sha512-57frrGM/OCTLqLOAh0mhVA9VBMHd+9U7Zb2THMGdBUoZVOtGbJzjxsYGDJ3A9AYYCP4hn6y1TVbaOfzWtm5GFg=="], 1538 1552 1553 + "redis-errors": ["redis-errors@1.2.0", "", {}, "sha512-1qny3OExCf0UvUV/5wpYKf2YwPcOqXzkwKKSmKHiE6ZMQs5heeE/c8eXK+PNllPvmjgAbfnsbpkGZWy8cBpn9w=="], 1554 + 1555 + "redis-parser": ["redis-parser@3.0.0", "", { "dependencies": { "redis-errors": "^1.0.0" } }, "sha512-DJnGAeenTdpMEH6uAJRK/uiyEIH9WVsUmoLwzudwGJUwZPp80PDBWPHXSAGNPwNvIXAbe7MSUB1zQFugFml66A=="], 1556 + 1539 1557 "require-directory": ["require-directory@2.1.1", "", {}, "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q=="], 1540 1558 1541 1559 "require-in-the-middle": ["require-in-the-middle@7.5.2", "", { "dependencies": { "debug": "^4.3.5", "module-details-from-path": "^1.0.3", "resolve": "^1.22.8" } }, "sha512-gAZ+kLqBdHarXB64XpAe2VCjB7rIRv+mU8tfRWziHRJ5umKsIHN2tLLv6EtMw7WCdP19S0ERVMldNvxYCHnhSQ=="], ··· 1593 1611 "split2": ["split2@4.2.0", "", {}, "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg=="], 1594 1612 1595 1613 "stackback": ["stackback@0.0.2", "", {}, "sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw=="], 1614 + 1615 + "standard-as-callback": ["standard-as-callback@2.1.0", "", {}, "sha512-qoRRSyROncaz1z0mvYqIE4lCd9p2R90i6GxW3uZv5ucSu8tU7B5HXUP1gG8pVZsYNVaXjk8ClXHPttLyxAL48A=="], 1596 1616 1597 1617 "statuses": ["statuses@2.0.2", "", {}, "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw=="], 1598 1618
+1
packages/@wispplace/database/src/types.ts
··· 73 73 */ 74 74 export interface SiteSettingsCache { 75 75 did: string; 76 + rkey: string; 76 77 record_cid: string; 77 78 directory_listing: boolean; 78 79 spa_mode: string | null;
+62
packages/@wispplace/fs-utils/src/file-cids.ts
··· 1 + export type FileCidsNormalizationSource = 2 + | 'object' 3 + | 'array' 4 + | 'string-json' 5 + | 'string-invalid' 6 + | 'null' 7 + | 'other'; 8 + 9 + export type FileCidsNormalization = { 10 + value: Record<string, string>; 11 + source: FileCidsNormalizationSource; 12 + }; 13 + 14 + export function normalizeFileCids(value: unknown): FileCidsNormalization { 15 + if (value == null) { 16 + return { value: {}, source: 'null' }; 17 + } 18 + 19 + if (typeof value === 'string') { 20 + try { 21 + const parsed = JSON.parse(value) as unknown; 22 + if (Array.isArray(parsed)) { 23 + const normalized = normalizeFileCids(parsed); 24 + return { value: normalized.value, source: 'string-json' }; 25 + } 26 + if (parsed && typeof parsed === 'object') { 27 + return { value: parsed as Record<string, string>, source: 'string-json' }; 28 + } 29 + } catch { 30 + // fall through to invalid 31 + } 32 + return { value: {}, source: 'string-invalid' }; 33 + } 34 + 35 + if (Array.isArray(value)) { 36 + const result: Record<string, string> = {}; 37 + for (const item of value) { 38 + if (Array.isArray(item) && item.length >= 2) { 39 + const [path, cid] = item; 40 + if (typeof path === 'string' && typeof cid === 'string') { 41 + result[path] = cid; 42 + } 43 + continue; 44 + } 45 + 46 + if (item && typeof item === 'object' && 'path' in item && 'cid' in item) { 47 + const path = (item as any).path; 48 + const cid = (item as any).cid; 49 + if (typeof path === 'string' && typeof cid === 'string') { 50 + result[path] = cid; 51 + } 52 + } 53 + } 54 + return { value: result, source: 'array' }; 55 + } 56 + 57 + if (typeof value === 'object') { 58 + return { value: value as Record<string, string>, source: 'object' }; 59 + } 60 + 61 + return { value: {}, source: 'other' }; 62 + }
+4
packages/@wispplace/fs-utils/src/index.ts
··· 5 5 export type { UploadedFile, FileUploadResult, ProcessedDirectory, ProcessUploadedFilesOptions, UpdateFileBlobsOptions } from './tree'; 6 6 export { processUploadedFiles, updateFileBlobs, countFilesInDirectory, collectFileCidsFromEntries } from './tree'; 7 7 8 + // File CID normalization 9 + export type { FileCidsNormalization, FileCidsNormalizationSource } from './file-cids'; 10 + export { normalizeFileCids } from './file-cids'; 11 + 8 12 // Manifest creation 9 13 export { createManifest } from './manifest'; 10 14