Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place

Simplify cache by removing metadata bucket and fixing cache bugs

+69 -321
-1
apps/firehose-service/.env.example
··· 10 10 11 11 # S3 Storage (leave empty for local disk fallback) 12 12 S3_BUCKET= 13 - S3_METADATA_BUCKET= 14 13 S3_REGION=auto 15 14 S3_ENDPOINT= 16 15 S3_PREFIX=sites/
-1
apps/firehose-service/src/config.ts
··· 12 12 13 13 // S3 storage (write destination) 14 14 s3Bucket: process.env.S3_BUCKET || '', 15 - s3MetadataBucket: process.env.S3_METADATA_BUCKET, 16 15 s3Region: process.env.S3_REGION || 'us-east-1', 17 16 s3Endpoint: process.env.S3_ENDPOINT, 18 17 s3ForcePathStyle: process.env.S3_FORCE_PATH_STYLE !== 'false',
-1
apps/firehose-service/src/lib/storage.ts
··· 29 29 : undefined, 30 30 prefix: config.s3Prefix, 31 31 forcePathStyle: config.s3ForcePathStyle, 32 - metadataBucket: config.s3MetadataBucket, 33 32 }); 34 33 logger.info('[Storage] Using S3 cold tier:', { bucket: config.s3Bucket }); 35 34 } else {
-1
apps/hosting-service/.env.example
··· 11 11 12 12 # S3 Storage (leave empty for local disk fallback) 13 13 S3_BUCKET= 14 - S3_METADATA_BUCKET= 15 14 S3_REGION=auto 16 15 S3_ENDPOINT= 17 16 S3_PREFIX=sites/
-1
apps/hosting-service/src/index.ts
··· 79 79 S3 Region: ${storageConfig.s3Region} 80 80 S3 Endpoint: ${storageConfig.s3Endpoint} 81 81 S3 Prefix: ${storageConfig.s3Prefix} 82 - Metadata Bucket: ${storageConfig.metadataBucket} 83 82 84 83 Firehose: DISABLED (read-only) 85 84 `);
+1 -1
apps/hosting-service/src/lib/on-demand-cache.ts
··· 156 156 await upsertSiteCache(did, rkey, recordCid, fileCids); 157 157 158 158 // Enqueue revalidate so firehose-service backfills S3 (cold tier) 159 - await enqueueRevalidate(did, rkey, `on-demand-cache`); 159 + await enqueueRevalidate(did, rkey, `storage-miss:on-demand`); 160 160 161 161 logger.info('Successfully cached site', { did, rkey, downloaded }); 162 162 return downloaded > 0;
+6 -1
apps/hosting-service/src/lib/revalidate-queue.ts
··· 50 50 } 51 51 52 52 try { 53 - const dedupeKey = `revalidate:site:${did}:${rkey}`; 53 + // Separate dedup keys per reason category so a storage-miss is never 54 + // silenced by a pending rewrite-miss (which runs with forceDownload=false) 55 + const reasonCategory = reason.startsWith('storage-miss') ? 'storage-miss' 56 + : reason.startsWith('rewrite-miss') ? 'rewrite-miss' 57 + : 'other'; 58 + const dedupeKey = `revalidate:site:${reasonCategory}:${did}:${rkey}`; 54 59 const set = await redis.set(dedupeKey, '1', 'EX', dedupeTtlSeconds, 'NX'); 55 60 if (!set) { 56 61 recordRevalidateResult('deduped');
-3
apps/hosting-service/src/lib/storage.ts
··· 27 27 28 28 // S3/Cold tier configuration (optional) 29 29 const S3_BUCKET = process.env.S3_BUCKET || ''; 30 - const S3_METADATA_BUCKET = process.env.S3_METADATA_BUCKET; 31 30 const S3_REGION = process.env.S3_REGION || 'us-east-1'; 32 31 const S3_ENDPOINT = process.env.S3_ENDPOINT; 33 32 const S3_FORCE_PATH_STYLE = process.env.S3_FORCE_PATH_STYLE !== 'false'; ··· 280 279 // Full three-tier setup with S3 as cold storage 281 280 const s3Tier = new S3StorageTier({ 282 281 bucket: S3_BUCKET, 283 - metadataBucket: S3_METADATA_BUCKET, 284 282 region: S3_REGION, 285 283 endpoint: S3_ENDPOINT, 286 284 forcePathStyle: S3_FORCE_PATH_STYLE, ··· 400 398 s3Region: S3_REGION, 401 399 s3Endpoint: S3_ENDPOINT || '(default AWS S3)', 402 400 s3Prefix: S3_PREFIX, 403 - metadataBucket: S3_METADATA_BUCKET || '(embedded in data bucket)', 404 401 }; 405 402 }
+62 -311
packages/@wispplace/tiered-storage/src/tiers/S3StorageTier.ts
··· 78 78 */ 79 79 forcePathStyle?: boolean; 80 80 81 - /** 82 - * Optional separate bucket for storing metadata. 83 - * 84 - * @remarks 85 - * **RECOMMENDED for production use!** 86 - * 87 - * By default, metadata is stored in S3 object metadata fields. However, updating 88 - * metadata requires copying the entire object, which is slow and expensive for large files. 89 - * 90 - * When `metadataBucket` is specified, metadata is stored as separate JSON objects 91 - * in this bucket. This allows fast, cheap metadata updates without copying data. 92 - * 93 - * **Benefits:** 94 - * - Fast metadata updates (no object copying) 95 - * - Much cheaper for large objects 96 - * - No impact on data object performance 97 - * 98 - * **Trade-offs:** 99 - * - Requires managing two buckets 100 - * - Metadata and data could become out of sync if not handled carefully 101 - * - Additional S3 API calls for metadata operations 102 - * 103 - * @example 104 - * ```typescript 105 - * const tier = new S3StorageTier({ 106 - * bucket: 'my-data-bucket', 107 - * metadataBucket: 'my-metadata-bucket', // Separate bucket for metadata 108 - * region: 'us-east-1', 109 - * }); 110 - * ``` 111 - */ 112 - metadataBucket?: string; 81 + 113 82 } 114 83 115 84 /** 116 85 * AWS S3 (or compatible) storage tier. 117 86 * 118 87 * @remarks 119 - * - Supports AWS S3, Cloudflare R2, MinIO, and other S3-compatible services 120 - * - Uses object metadata for StorageMetadata 88 + * - Supports AWS S3, Cloudflare R2, MinIO, Hetzner Object Storage, and other S3-compatible services 89 + * - Metadata is stored inline as S3 object metadata headers (x-amz-meta-*) 90 + * - Single request per read/write — no separate metadata objects 121 91 * - Requires `@aws-sdk/client-s3` peer dependency 122 92 * - Typically used as the cold tier (source of truth) 123 - * 124 - * **Metadata Storage:** 125 - * Metadata is stored in S3 object metadata fields: 126 - * - Custom metadata fields are prefixed with `x-amz-meta-` 127 - * - Built-in fields use standard S3 headers 128 93 * 129 94 * @example 130 95 * ```typescript ··· 155 120 export class S3StorageTier implements StorageTier { 156 121 private client: S3Client; 157 122 private prefix: string; 158 - private metadataBucket?: string; 159 123 160 124 constructor(private config: S3StorageTierConfig) { 161 125 const clientConfig: S3ClientConfig = { ··· 168 132 169 133 this.client = new S3Client(clientConfig); 170 134 this.prefix = config.prefix ?? ''; 171 - if (config.metadataBucket) { 172 - this.metadataBucket = config.metadataBucket; 173 - } 174 135 } 175 136 176 137 async get(key: string): Promise<Uint8Array | null> { ··· 202 163 * @returns The data and metadata, or null if not found 203 164 * 204 165 * @remarks 205 - * When using a separate metadata bucket, fetches data and metadata in parallel. 206 - * Otherwise, uses the data object's embedded metadata. 166 + * Metadata is read from S3 object metadata headers (x-amz-meta-*), 167 + * returned in a single request alongside the file body. 207 168 */ 208 169 async getWithMetadata(key: string): Promise<TierGetResult | null> { 209 170 const s3Key = this.getS3Key(key); 210 171 211 172 try { 212 - if (this.metadataBucket) { 213 - // Fetch data and metadata in parallel 214 - const [dataResponse, metadataResponse] = await Promise.all([ 215 - this.client.send( 216 - new GetObjectCommand({ 217 - Bucket: this.config.bucket, 218 - Key: s3Key, 219 - }), 220 - ), 221 - this.client.send( 222 - new GetObjectCommand({ 223 - Bucket: this.metadataBucket, 224 - Key: s3Key + '.meta', 225 - }), 226 - ), 227 - ]); 173 + const response = await this.client.send( 174 + new GetObjectCommand({ 175 + Bucket: this.config.bucket, 176 + Key: s3Key, 177 + }), 178 + ); 228 179 229 - if (!dataResponse.Body || !metadataResponse.Body) { 230 - return null; 231 - } 180 + if (!response.Body || !response.Metadata) { 181 + return null; 182 + } 232 183 233 - const [data, metaBuffer] = await Promise.all([ 234 - this.streamToUint8Array(dataResponse.Body as Readable), 235 - this.streamToUint8Array(metadataResponse.Body as Readable), 236 - ]); 184 + const data = await this.streamToUint8Array(response.Body as Readable); 185 + const metadata = this.s3ToMetadata(response.Metadata); 237 186 238 - const json = new TextDecoder().decode(metaBuffer); 239 - let metadata: StorageMetadata; 240 - try { 241 - metadata = JSON.parse(json) as StorageMetadata; 242 - } catch { 243 - // Corrupted or partial .meta file — return null so the caller 244 - // falls through to on-demand fetch rather than serving bad data. 245 - return null; 246 - } 247 - metadata.createdAt = new Date(metadata.createdAt); 248 - metadata.lastAccessed = new Date(metadata.lastAccessed); 249 - if (metadata.ttl) { 250 - metadata.ttl = new Date(metadata.ttl); 251 - } 252 - 253 - return { data, metadata }; 254 - } else { 255 - // Get data with embedded metadata from response headers 256 - const response = await this.client.send( 257 - new GetObjectCommand({ 258 - Bucket: this.config.bucket, 259 - Key: s3Key, 260 - }), 261 - ); 262 - 263 - if (!response.Body || !response.Metadata) { 264 - return null; 265 - } 266 - 267 - const data = await this.streamToUint8Array(response.Body as Readable); 268 - const metadata = this.s3ToMetadata(response.Metadata); 269 - 270 - return { data, metadata }; 271 - } 187 + return { data, metadata }; 272 188 } catch (error) { 273 189 if (this.isNoSuchKeyError(error)) { 274 190 return null; ··· 291 207 const s3Key = this.getS3Key(key); 292 208 293 209 try { 294 - if (this.metadataBucket) { 295 - // Fetch data stream and metadata in parallel 296 - const [dataResponse, metadataResponse] = await Promise.all([ 297 - this.client.send( 298 - new GetObjectCommand({ 299 - Bucket: this.config.bucket, 300 - Key: s3Key, 301 - }), 302 - ), 303 - this.client.send( 304 - new GetObjectCommand({ 305 - Bucket: this.metadataBucket, 306 - Key: s3Key + '.meta', 307 - }), 308 - ), 309 - ]); 210 + const response = await this.client.send( 211 + new GetObjectCommand({ 212 + Bucket: this.config.bucket, 213 + Key: s3Key, 214 + }), 215 + ); 310 216 311 - if (!dataResponse.Body || !metadataResponse.Body) { 312 - return null; 313 - } 314 - 315 - // Only buffer the small metadata, stream the data 316 - const metaBuffer = await this.streamToUint8Array(metadataResponse.Body as Readable); 317 - const json = new TextDecoder().decode(metaBuffer); 318 - const metadata = JSON.parse(json) as StorageMetadata; 319 - metadata.createdAt = new Date(metadata.createdAt); 320 - metadata.lastAccessed = new Date(metadata.lastAccessed); 321 - if (metadata.ttl) { 322 - metadata.ttl = new Date(metadata.ttl); 323 - } 324 - 325 - return { stream: dataResponse.Body as Readable, metadata }; 326 - } else { 327 - // Get data stream with embedded metadata from response headers 328 - const response = await this.client.send( 329 - new GetObjectCommand({ 330 - Bucket: this.config.bucket, 331 - Key: s3Key, 332 - }), 333 - ); 217 + if (!response.Body || !response.Metadata) { 218 + return null; 219 + } 334 220 335 - if (!response.Body || !response.Metadata) { 336 - return null; 337 - } 338 - 339 - const metadata = this.s3ToMetadata(response.Metadata); 221 + const metadata = this.s3ToMetadata(response.Metadata); 340 222 341 - return { stream: response.Body as Readable, metadata }; 342 - } 223 + return { stream: response.Body as Readable, metadata }; 343 224 } catch (error) { 344 225 if (this.isNoSuchKeyError(error)) { 345 226 return null; ··· 364 245 stream: NodeJS.ReadableStream, 365 246 metadata: StorageMetadata, 366 247 ): Promise<void> { 367 - const s3Key = this.getS3Key(key); 248 + const upload = new Upload({ 249 + client: this.client, 250 + params: { 251 + Bucket: this.config.bucket, 252 + Key: this.getS3Key(key), 253 + Body: stream as Readable, 254 + Metadata: this.metadataToS3(metadata), 255 + }, 256 + }); 368 257 369 - if (this.metadataBucket) { 370 - // Use multipart upload for streaming data 371 - const upload = new Upload({ 372 - client: this.client, 373 - params: { 374 - Bucket: this.config.bucket, 375 - Key: s3Key, 376 - Body: stream as Readable, 377 - }, 378 - }); 379 - 380 - const metadataJson = JSON.stringify(metadata); 381 - const metadataBuffer = new TextEncoder().encode(metadataJson); 382 - const metadataCommand = new PutObjectCommand({ 383 - Bucket: this.metadataBucket, 384 - Key: s3Key + '.meta', 385 - Body: metadataBuffer, 386 - ContentType: 'application/json', 387 - }); 388 - 389 - await Promise.all([upload.done(), this.client.send(metadataCommand)]); 390 - } else { 391 - // Use multipart upload with embedded metadata 392 - const upload = new Upload({ 393 - client: this.client, 394 - params: { 395 - Bucket: this.config.bucket, 396 - Key: s3Key, 397 - Body: stream as Readable, 398 - Metadata: this.metadataToS3(metadata), 399 - }, 400 - }); 401 - 402 - await upload.done(); 403 - } 258 + await upload.done(); 404 259 } 405 260 406 261 private async streamToUint8Array(stream: Readable): Promise<Uint8Array> { ··· 437 292 } 438 293 439 294 async set(key: string, data: Uint8Array, metadata: StorageMetadata): Promise<void> { 440 - const s3Key = this.getS3Key(key); 441 - 442 - if (this.metadataBucket) { 443 - const dataCommand = new PutObjectCommand({ 295 + await this.client.send( 296 + new PutObjectCommand({ 444 297 Bucket: this.config.bucket, 445 - Key: s3Key, 446 - Body: data, 447 - ContentLength: data.byteLength, 448 - }); 449 - 450 - const metadataJson = JSON.stringify(metadata); 451 - const metadataBuffer = new TextEncoder().encode(metadataJson); 452 - const metadataCommand = new PutObjectCommand({ 453 - Bucket: this.metadataBucket, 454 - Key: s3Key + '.meta', 455 - Body: metadataBuffer, 456 - ContentType: 'application/json', 457 - }); 458 - 459 - await Promise.all([this.client.send(dataCommand), this.client.send(metadataCommand)]); 460 - } else { 461 - const command = new PutObjectCommand({ 462 - Bucket: this.config.bucket, 463 - Key: s3Key, 298 + Key: this.getS3Key(key), 464 299 Body: data, 465 300 ContentLength: data.byteLength, 466 301 Metadata: this.metadataToS3(metadata), 467 - }); 468 - 469 - await this.client.send(command); 470 - } 302 + }), 303 + ); 471 304 } 472 305 473 306 async delete(key: string): Promise<void> { 474 - const s3Key = this.getS3Key(key); 475 - 476 307 try { 477 - const dataCommand = new DeleteObjectCommand({ 478 - Bucket: this.config.bucket, 479 - Key: s3Key, 480 - }); 481 - 482 - if (this.metadataBucket) { 483 - const metadataCommand = new DeleteObjectCommand({ 484 - Bucket: this.metadataBucket, 485 - Key: s3Key + '.meta', 486 - }); 487 - 488 - await Promise.all([ 489 - this.client.send(dataCommand), 490 - this.client.send(metadataCommand).catch((error) => { 491 - if (!this.isNoSuchKeyError(error)) throw error; 492 - }), 493 - ]); 494 - } else { 495 - await this.client.send(dataCommand); 496 - } 308 + await this.client.send( 309 + new DeleteObjectCommand({ 310 + Bucket: this.config.bucket, 311 + Key: this.getS3Key(key), 312 + }), 313 + ); 497 314 } catch (error) { 498 315 if (!this.isNoSuchKeyError(error)) { 499 316 throw error; ··· 553 370 for (let i = 0; i < keys.length; i += batchSize) { 554 371 const batch = keys.slice(i, i + batchSize); 555 372 556 - const dataCommand = new DeleteObjectsCommand({ 557 - Bucket: this.config.bucket, 558 - Delete: { 559 - Objects: batch.map((key) => ({ Key: this.getS3Key(key) })), 560 - }, 561 - }); 562 - 563 - if (this.metadataBucket) { 564 - const metadataCommand = new DeleteObjectsCommand({ 565 - Bucket: this.metadataBucket, 373 + await this.client.send( 374 + new DeleteObjectsCommand({ 375 + Bucket: this.config.bucket, 566 376 Delete: { 567 - Objects: batch.map((key) => ({ Key: this.getS3Key(key) + '.meta' })), 377 + Objects: batch.map((key) => ({ Key: this.getS3Key(key) })), 568 378 }, 569 - }); 570 - 571 - await Promise.all([ 572 - this.client.send(dataCommand), 573 - this.client.send(metadataCommand).catch(() => {}), 574 - ]); 575 - } else { 576 - await this.client.send(dataCommand); 577 - } 379 + }), 380 + ); 578 381 } 579 382 } 580 383 581 384 async getMetadata(key: string): Promise<StorageMetadata | null> { 582 - if (this.metadataBucket) { 583 - try { 584 - const command = new GetObjectCommand({ 585 - Bucket: this.metadataBucket, 586 - Key: this.getS3Key(key) + '.meta', 587 - }); 588 - 589 - const response = await this.client.send(command); 590 - 591 - if (!response.Body) { 592 - return null; 593 - } 594 - 595 - const buffer = await this.streamToUint8Array(response.Body as Readable); 596 - const json = new TextDecoder().decode(buffer); 597 - let metadata: StorageMetadata; 598 - try { 599 - metadata = JSON.parse(json) as StorageMetadata; 600 - } catch { 601 - return null; 602 - } 603 - 604 - metadata.createdAt = new Date(metadata.createdAt); 605 - metadata.lastAccessed = new Date(metadata.lastAccessed); 606 - if (metadata.ttl) { 607 - metadata.ttl = new Date(metadata.ttl); 608 - } 609 - 610 - return metadata; 611 - } catch (error) { 612 - if (this.isNoSuchKeyError(error)) { 613 - return null; 614 - } 615 - throw error; 616 - } 617 - } 618 - 619 385 try { 620 - const command = new HeadObjectCommand({ 621 - Bucket: this.config.bucket, 622 - Key: this.getS3Key(key), 623 - }); 624 - 625 - const response = await this.client.send(command); 386 + const response = await this.client.send( 387 + new HeadObjectCommand({ 388 + Bucket: this.config.bucket, 389 + Key: this.getS3Key(key), 390 + }), 391 + ); 626 392 627 393 if (!response.Metadata) { 628 394 return null; ··· 638 404 } 639 405 640 406 async setMetadata(key: string, metadata: StorageMetadata): Promise<void> { 641 - if (this.metadataBucket) { 642 - const metadataJson = JSON.stringify(metadata); 643 - const buffer = new TextEncoder().encode(metadataJson); 644 - 645 - const command = new PutObjectCommand({ 646 - Bucket: this.metadataBucket, 647 - Key: this.getS3Key(key) + '.meta', 648 - Body: buffer, 649 - ContentType: 'application/json', 650 - }); 651 - 652 - await this.client.send(command); 653 - return; 654 - } 655 - 656 407 const s3Key = this.getS3Key(key); 657 408 const command = new CopyObjectCommand({ 658 409 Bucket: this.config.bucket,