Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place

rudimentry _redirects support, incremental uploading for cli #3

closed opened by nekomimi.pet targeting main

TODO _headers file place.wisp.settings lexicon as a lexiconal way of configuring this

Labels

None yet.

assignee

None yet.

Participants 1
AT URI
at://did:plc:ttdrpj45ibqunmfhdsb4zdwq/sh.tangled.repo.pull/3m5htcylipq22
+298 -46
Diff #1
+1
cli/.gitignore
··· 1 + test/ 1 2 .DS_STORE 2 3 jacquard/ 3 4 binaries/
+3
cli/Cargo.lock
··· 4385 4385 "jacquard-oauth", 4386 4386 "miette", 4387 4387 "mime_guess", 4388 + "multibase", 4389 + "multihash", 4388 4390 "reqwest", 4389 4391 "rustversion", 4390 4392 "serde", 4391 4393 "serde_json", 4394 + "sha2", 4392 4395 "shellexpand", 4393 4396 "tokio", 4394 4397 "walkdir",
+3
cli/Cargo.toml
··· 30 30 mime_guess = "2.0" 31 31 bytes = "1.10" 32 32 futures = "0.3.31" 33 + multihash = "0.19.3" 34 + multibase = "0.9" 35 + sha2 = "0.10"
+85
cli/src/blob_map.rs
··· 1 + use jacquard_common::types::blob::BlobRef; 2 + use jacquard_common::IntoStatic; 3 + use std::collections::HashMap; 4 + 5 + use crate::place_wisp::fs::{Directory, EntryNode}; 6 + 7 + /// Extract blob information from a directory tree 8 + /// Returns a map of file paths to their blob refs and CIDs 9 + /// 10 + /// This mirrors the TypeScript implementation in src/lib/wisp-utils.ts lines 275-302 11 + pub fn extract_blob_map( 12 + directory: &Directory, 13 + ) -> HashMap<String, (BlobRef<'static>, String)> { 14 + extract_blob_map_recursive(directory, String::new()) 15 + } 16 + 17 + fn extract_blob_map_recursive( 18 + directory: &Directory, 19 + current_path: String, 20 + ) -> HashMap<String, (BlobRef<'static>, String)> { 21 + let mut blob_map = HashMap::new(); 22 + 23 + for entry in &directory.entries { 24 + let full_path = if current_path.is_empty() { 25 + entry.name.to_string() 26 + } else { 27 + format!("{}/{}", current_path, entry.name) 28 + }; 29 + 30 + match &entry.node { 31 + EntryNode::File(file_node) => { 32 + // Extract CID from blob ref 33 + // BlobRef is an enum with Blob variant, which has a ref field (CidLink) 34 + let blob_ref = &file_node.blob; 35 + let cid_string = blob_ref.blob().r#ref.to_string(); 36 + 37 + // Store with full path (mirrors TypeScript implementation) 38 + blob_map.insert( 39 + full_path, 40 + (blob_ref.clone().into_static(), cid_string) 41 + ); 42 + } 43 + EntryNode::Directory(subdir) => { 44 + let sub_map = extract_blob_map_recursive(subdir, full_path); 45 + blob_map.extend(sub_map); 46 + } 47 + EntryNode::Unknown(_) => { 48 + // Skip unknown node types 49 + } 50 + } 51 + } 52 + 53 + blob_map 54 + } 55 + 56 + /// Normalize file path by removing base folder prefix 57 + /// Example: "cobblemon/index.html" -> "index.html" 58 + /// 59 + /// Note: This function is kept for reference but is no longer used in production code. 60 + /// The TypeScript server has a similar normalization (src/routes/wisp.ts line 291) to handle 61 + /// uploads that include a base folder prefix, but our CLI doesn't need this since we 62 + /// track full paths consistently. 63 + #[allow(dead_code)] 64 + pub fn normalize_path(path: &str) -> String { 65 + // Remove base folder prefix (everything before first /) 66 + if let Some(idx) = path.find('/') { 67 + path[idx + 1..].to_string() 68 + } else { 69 + path.to_string() 70 + } 71 + } 72 + 73 + #[cfg(test)] 74 + mod tests { 75 + use super::*; 76 + 77 + #[test] 78 + fn test_normalize_path() { 79 + assert_eq!(normalize_path("index.html"), "index.html"); 80 + assert_eq!(normalize_path("cobblemon/index.html"), "index.html"); 81 + assert_eq!(normalize_path("folder/subfolder/file.txt"), "subfolder/file.txt"); 82 + assert_eq!(normalize_path("a/b/c/d.txt"), "b/c/d.txt"); 83 + } 84 + } 85 +
+66
cli/src/cid.rs
··· 1 + use jacquard_common::types::cid::IpldCid; 2 + use sha2::{Digest, Sha256}; 3 + 4 + /// Compute CID (Content Identifier) for blob content 5 + /// Uses the same algorithm as AT Protocol: CIDv1 with raw codec (0x55) and SHA-256 6 + /// 7 + /// CRITICAL: This must be called on BASE64-ENCODED GZIPPED content, not just gzipped content 8 + /// 9 + /// Based on @atproto/common/src/ipld.ts sha256RawToCid implementation 10 + pub fn compute_cid(content: &[u8]) -> String { 11 + // Use node crypto to compute sha256 hash (same as AT Protocol) 12 + let hash = Sha256::digest(content); 13 + 14 + // Create multihash (code 0x12 = sha2-256) 15 + let multihash = multihash::Multihash::wrap(0x12, &hash) 16 + .expect("SHA-256 hash should always fit in multihash"); 17 + 18 + // Create CIDv1 with raw codec (0x55) 19 + let cid = IpldCid::new_v1(0x55, multihash); 20 + 21 + // Convert to base32 string representation 22 + cid.to_string_of_base(multibase::Base::Base32Lower) 23 + .unwrap_or_else(|_| cid.to_string()) 24 + } 25 + 26 + #[cfg(test)] 27 + mod tests { 28 + use super::*; 29 + use base64::Engine; 30 + 31 + #[test] 32 + fn test_compute_cid() { 33 + // Test with a simple string: "hello" 34 + let content = b"hello"; 35 + let cid = compute_cid(content); 36 + 37 + // CID should start with 'baf' for raw codec base32 38 + assert!(cid.starts_with("baf")); 39 + } 40 + 41 + #[test] 42 + fn test_compute_cid_base64_encoded() { 43 + // Simulate the actual use case: gzipped then base64 encoded 44 + use flate2::write::GzEncoder; 45 + use flate2::Compression; 46 + use std::io::Write; 47 + 48 + let original = b"hello world"; 49 + 50 + // Gzip compress 51 + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); 52 + encoder.write_all(original).unwrap(); 53 + let gzipped = encoder.finish().unwrap(); 54 + 55 + // Base64 encode the gzipped data 56 + let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes(); 57 + 58 + // Compute CID on the base64 bytes 59 + let cid = compute_cid(&base64_bytes); 60 + 61 + // Should be a valid CID 62 + assert!(cid.starts_with("baf")); 63 + assert!(cid.len() > 10); 64 + } 65 + } 66 +
+140 -46
cli/src/main.rs
··· 1 1 mod builder_types; 2 2 mod place_wisp; 3 + mod cid; 4 + mod blob_map; 3 5 4 6 use clap::Parser; 5 7 use jacquard::CowStr; 6 - use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession}; 8 + use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession, AgentSession}; 7 9 use jacquard::oauth::client::OAuthClient; 8 10 use jacquard::oauth::loopback::LoopbackConfig; 9 11 use jacquard::prelude::IdentityResolver; ··· 11 13 use jacquard_common::types::blob::MimeType; 12 14 use miette::IntoDiagnostic; 13 15 use std::path::{Path, PathBuf}; 16 + use std::collections::HashMap; 14 17 use flate2::Compression; 15 18 use flate2::write::GzEncoder; 16 19 use std::io::Write; ··· 107 110 108 111 println!("Deploying site '{}'...", site_name); 109 112 113 + // Try to fetch existing manifest for incremental updates 114 + let existing_blob_map: HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)> = { 115 + use jacquard_common::types::string::AtUri; 116 + 117 + // Get the DID for this session 118 + let session_info = agent.session_info().await; 119 + if let Some((did, _)) = session_info { 120 + // Construct the AT URI for the record 121 + let uri_string = format!("at://{}/place.wisp.fs/{}", did, site_name); 122 + if let Ok(uri) = AtUri::new(&uri_string) { 123 + match agent.get_record::<Fs>(&uri).await { 124 + Ok(response) => { 125 + match response.into_output() { 126 + Ok(record_output) => { 127 + let existing_manifest = record_output.value; 128 + let blob_map = blob_map::extract_blob_map(&existing_manifest.root); 129 + println!("Found existing manifest with {} files, checking for changes...", blob_map.len()); 130 + blob_map 131 + } 132 + Err(_) => { 133 + println!("No existing manifest found, uploading all files..."); 134 + HashMap::new() 135 + } 136 + } 137 + } 138 + Err(_) => { 139 + // Record doesn't exist yet - this is a new site 140 + println!("No existing manifest found, uploading all files..."); 141 + HashMap::new() 142 + } 143 + } 144 + } else { 145 + println!("No existing manifest found (invalid URI), uploading all files..."); 146 + HashMap::new() 147 + } 148 + } else { 149 + println!("No existing manifest found (could not get DID), uploading all files..."); 150 + HashMap::new() 151 + } 152 + }; 153 + 110 154 // Build directory tree 111 - let root_dir = build_directory(agent, &path).await?; 155 + let (root_dir, total_files, reused_count) = build_directory(agent, &path, &existing_blob_map, String::new()).await?; 156 + let uploaded_count = total_files - reused_count; 112 157 113 - // Count total files 114 - let file_count = count_files(&root_dir); 115 - 116 158 // Create the Fs record 117 159 let fs_record = Fs::new() 118 160 .site(CowStr::from(site_name.clone())) 119 161 .root(root_dir) 120 - .file_count(file_count as i64) 162 + .file_count(total_files as i64) 121 163 .created_at(Datetime::now()) 122 164 .build(); 123 165 ··· 132 174 .and_then(|s| s.split('/').next()) 133 175 .ok_or_else(|| miette::miette!("Failed to parse DID from URI"))?; 134 176 135 - println!("Deployed site '{}': {}", site_name, output.uri); 136 - println!("Available at: https://sites.wisp.place/{}/{}", did, site_name); 177 + println!("\n✓ Deployed site '{}': {}", site_name, output.uri); 178 + println!(" Total files: {} ({} reused, {} uploaded)", total_files, reused_count, uploaded_count); 179 + println!(" Available at: https://sites.wisp.place/{}/{}", did, site_name); 137 180 138 181 Ok(()) 139 182 } 140 183 141 - 184 + /// Recursively build a Directory from a filesystem path 185 + /// current_path is the path from the root of the site (e.g., "" for root, "config" for config dir) 142 186 fn build_directory<'a>( 143 187 agent: &'a Agent<impl jacquard::client::AgentSession + IdentityResolver + 'a>, 144 188 dir_path: &'a Path, 145 - ) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<Directory<'static>>> + 'a>> 189 + existing_blobs: &'a HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, 190 + current_path: String, 191 + ) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<(Directory<'static>, usize, usize)>> + 'a>> 146 192 { 147 193 Box::pin(async move { 148 194 // Collect all directory entries first ··· 167 213 168 214 169 215 216 + let metadata = entry.metadata().into_diagnostic()?; 170 217 171 - 172 - 173 - 174 - 175 - 176 - 218 + if metadata.is_file() { 219 + // Construct full path for this file (for blob map lookup) 220 + let full_path = if current_path.is_empty() { 221 + name_str.clone() 222 + } else { 223 + format!("{}/{}", current_path, name_str) 224 + }; 225 + file_tasks.push((name_str, path, full_path)); 226 + } else if metadata.is_dir() { 227 + dir_tasks.push((name_str, path)); 228 + } 177 229 } 178 230 179 231 // Process files concurrently with a limit of 5 180 - let file_entries: Vec<Entry> = stream::iter(file_tasks) 181 - .map(|(name, path)| async move { 182 - let file_node = process_file(agent, &path).await?; 183 - Ok::<_, miette::Report>(Entry::new() 232 + let file_results: Vec<(Entry<'static>, bool)> = stream::iter(file_tasks) 233 + .map(|(name, path, full_path)| async move { 234 + let (file_node, reused) = process_file(agent, &path, &full_path, existing_blobs).await?; 235 + let entry = Entry::new() 184 236 .name(CowStr::from(name)) 185 237 .node(EntryNode::File(Box::new(file_node))) 186 - .build()) 238 + .build(); 239 + Ok::<_, miette::Report>((entry, reused)) 187 240 }) 188 241 .buffer_unordered(5) 189 242 .collect::<Vec<_>>() 190 243 .await 191 244 .into_iter() 192 245 .collect::<miette::Result<Vec<_>>>()?; 246 + 247 + let mut file_entries = Vec::new(); 248 + let mut reused_count = 0; 249 + let mut total_files = 0; 250 + 251 + for (entry, reused) in file_results { 252 + file_entries.push(entry); 253 + total_files += 1; 254 + if reused { 255 + reused_count += 1; 256 + } 257 + } 193 258 194 259 // Process directories recursively (sequentially to avoid too much nesting) 195 260 let mut dir_entries = Vec::new(); 196 261 for (name, path) in dir_tasks { 197 - let subdir = build_directory(agent, &path).await?; 262 + // Construct full path for subdirectory 263 + let subdir_path = if current_path.is_empty() { 264 + name.clone() 265 + } else { 266 + format!("{}/{}", current_path, name) 267 + }; 268 + let (subdir, sub_total, sub_reused) = build_directory(agent, &path, existing_blobs, subdir_path).await?; 198 269 dir_entries.push(Entry::new() 199 270 .name(CowStr::from(name)) 200 271 .node(EntryNode::Directory(Box::new(subdir))) 201 272 .build()); 273 + total_files += sub_total; 274 + reused_count += sub_reused; 202 275 } 203 276 204 277 // Combine file and directory entries 205 278 let mut entries = file_entries; 206 279 entries.extend(dir_entries); 207 280 208 - Ok(Directory::new() 281 + let directory = Directory::new() 209 282 .r#type(CowStr::from("directory")) 210 283 .entries(entries) 211 - .build()) 284 + .build(); 285 + 286 + Ok((directory, total_files, reused_count)) 212 287 }) 213 288 } 214 289 215 - /// Process a single file: gzip -> base64 -> upload blob 290 + /// Process a single file: gzip -> base64 -> upload blob (or reuse existing) 291 + /// Returns (File, reused: bool) 292 + /// file_path_key is the full path from the site root (e.g., "config/file.json") for blob map lookup 216 293 async fn process_file( 217 294 agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>, 218 295 file_path: &Path, 219 - ) -> miette::Result<File<'static>> 296 + file_path_key: &str, 297 + existing_blobs: &HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, 298 + ) -> miette::Result<(File<'static>, bool)> 220 299 { 221 300 // Read file 222 301 let file_data = std::fs::read(file_path).into_diagnostic()?; ··· 234 313 // Base64 encode the gzipped data 235 314 let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes(); 236 315 237 - // Upload blob as octet-stream 316 + // Compute CID for this file (CRITICAL: on base64-encoded gzipped content) 317 + let file_cid = cid::compute_cid(&base64_bytes); 318 + 319 + // Check if we have an existing blob with the same CID 320 + let existing_blob = existing_blobs.get(file_path_key); 321 + 322 + if let Some((existing_blob_ref, existing_cid)) = existing_blob { 323 + if existing_cid == &file_cid { 324 + // CIDs match - reuse existing blob 325 + println!(" ✓ Reusing blob for {} (CID: {})", file_path_key, file_cid); 326 + return Ok(( 327 + File::new() 328 + .r#type(CowStr::from("file")) 329 + .blob(existing_blob_ref.clone()) 330 + .encoding(CowStr::from("gzip")) 331 + .mime_type(CowStr::from(original_mime)) 332 + .base64(true) 333 + .build(), 334 + true 335 + )); 336 + } 337 + } 338 + 339 + // File is new or changed - upload it 340 + println!(" ↑ Uploading {} ({} bytes, CID: {})", file_path_key, base64_bytes.len(), file_cid); 238 341 let blob = agent.upload_blob( 239 342 base64_bytes, 240 343 MimeType::new_static("application/octet-stream"), 241 344 ).await?; 242 345 243 - Ok(File::new() 244 - .r#type(CowStr::from("file")) 245 - .blob(blob) 246 - .encoding(CowStr::from("gzip")) 247 - .mime_type(CowStr::from(original_mime)) 248 - .base64(true) 249 - .build()) 346 + Ok(( 347 + File::new() 348 + .r#type(CowStr::from("file")) 349 + .blob(blob) 350 + .encoding(CowStr::from("gzip")) 351 + .mime_type(CowStr::from(original_mime)) 352 + .base64(true) 353 + .build(), 354 + false 355 + )) 250 356 } 251 357 252 - /// Count total files in a directory tree 253 - fn count_files(dir: &Directory) -> usize { 254 - let mut count = 0; 255 - for entry in &dir.entries { 256 - match &entry.node { 257 - EntryNode::File(_) => count += 1, 258 - EntryNode::Directory(subdir) => count += count_files(subdir), 259 - _ => {} // Unknown variants 260 - } 261 - } 262 - count 263 - }

History

3 rounds 0 comments
sign up or login to add to the discussion
6 commits
expand
9a803381
init support for redirects file
f1f70b3b
Add support for existing blob reuse in deployment process
56b1ef45
dont normalize paths when comparing CIDs
38b1c4c6
add pull and serve to cli
436d7a06
remove jacquard submodule
122e18dd
update flake
expand 0 comments
closed without merging
2 commits
expand
f1f70b3b
Add support for existing blob reuse in deployment process
56b1ef45
dont normalize paths when comparing CIDs
expand 0 comments
2 commits
expand
9a803381
init support for redirects file
f1f70b3b
Add support for existing blob reuse in deployment process
expand 0 comments