rudimentry _redirects support, incremental uploading for cli #3

+1

cli/.gitignore

··· 1 + test/ 1 2 .DS_STORE 2 3 jacquard/ 3 4 binaries/

+3

cli/Cargo.lock

··· 4385 4385 "jacquard-oauth", 4386 4386 "miette", 4387 4387 "mime_guess", 4388 + "multibase", 4389 + "multihash", 4388 4390 "reqwest", 4389 4391 "rustversion", 4390 4392 "serde", 4391 4393 "serde_json", 4394 + "sha2", 4392 4395 "shellexpand", 4393 4396 "tokio", 4394 4397 "walkdir",

+3

cli/Cargo.toml

··· 30 30 mime_guess = "2.0" 31 31 bytes = "1.10" 32 32 futures = "0.3.31" 33 + multihash = "0.19.3" 34 + multibase = "0.9" 35 + sha2 = "0.10"

+85

cli/src/blob_map.rs

··· 1 + use jacquard_common::types::blob::BlobRef; 2 + use jacquard_common::IntoStatic; 3 + use std::collections::HashMap; 4 + 5 + use crate::place_wisp::fs::{Directory, EntryNode}; 6 + 7 + /// Extract blob information from a directory tree 8 + /// Returns a map of file paths to their blob refs and CIDs 9 + /// 10 + /// This mirrors the TypeScript implementation in src/lib/wisp-utils.ts lines 275-302 11 + pub fn extract_blob_map( 12 + directory: &Directory, 13 + ) -> HashMap<String, (BlobRef<'static>, String)> { 14 + extract_blob_map_recursive(directory, String::new()) 15 + } 16 + 17 + fn extract_blob_map_recursive( 18 + directory: &Directory, 19 + current_path: String, 20 + ) -> HashMap<String, (BlobRef<'static>, String)> { 21 + let mut blob_map = HashMap::new(); 22 + 23 + for entry in &directory.entries { 24 + let full_path = if current_path.is_empty() { 25 + entry.name.to_string() 26 + } else { 27 + format!("{}/{}", current_path, entry.name) 28 + }; 29 + 30 + match &entry.node { 31 + EntryNode::File(file_node) => { 32 + // Extract CID from blob ref 33 + // BlobRef is an enum with Blob variant, which has a ref field (CidLink) 34 + let blob_ref = &file_node.blob; 35 + let cid_string = blob_ref.blob().r#ref.to_string(); 36 + 37 + // Store with full path (mirrors TypeScript implementation) 38 + blob_map.insert( 39 + full_path, 40 + (blob_ref.clone().into_static(), cid_string) 41 + ); 42 + } 43 + EntryNode::Directory(subdir) => { 44 + let sub_map = extract_blob_map_recursive(subdir, full_path); 45 + blob_map.extend(sub_map); 46 + } 47 + EntryNode::Unknown(_) => { 48 + // Skip unknown node types 49 + } 50 + } 51 + } 52 + 53 + blob_map 54 + } 55 + 56 + /// Normalize file path by removing base folder prefix 57 + /// Example: "cobblemon/index.html" -> "index.html" 58 + /// 59 + /// Note: This function is kept for reference but is no longer used in production code. 60 + /// The TypeScript server has a similar normalization (src/routes/wisp.ts line 291) to handle 61 + /// uploads that include a base folder prefix, but our CLI doesn't need this since we 62 + /// track full paths consistently. 63 + #[allow(dead_code)] 64 + pub fn normalize_path(path: &str) -> String { 65 + // Remove base folder prefix (everything before first /) 66 + if let Some(idx) = path.find('/') { 67 + path[idx + 1..].to_string() 68 + } else { 69 + path.to_string() 70 + } 71 + } 72 + 73 + #[cfg(test)] 74 + mod tests { 75 + use super::*; 76 + 77 + #[test] 78 + fn test_normalize_path() { 79 + assert_eq!(normalize_path("index.html"), "index.html"); 80 + assert_eq!(normalize_path("cobblemon/index.html"), "index.html"); 81 + assert_eq!(normalize_path("folder/subfolder/file.txt"), "subfolder/file.txt"); 82 + assert_eq!(normalize_path("a/b/c/d.txt"), "b/c/d.txt"); 83 + } 84 + } 85 +

+66

cli/src/cid.rs

··· 1 + use jacquard_common::types::cid::IpldCid; 2 + use sha2::{Digest, Sha256}; 3 + 4 + /// Compute CID (Content Identifier) for blob content 5 + /// Uses the same algorithm as AT Protocol: CIDv1 with raw codec (0x55) and SHA-256 6 + /// 7 + /// CRITICAL: This must be called on BASE64-ENCODED GZIPPED content, not just gzipped content 8 + /// 9 + /// Based on @atproto/common/src/ipld.ts sha256RawToCid implementation 10 + pub fn compute_cid(content: &[u8]) -> String { 11 + // Use node crypto to compute sha256 hash (same as AT Protocol) 12 + let hash = Sha256::digest(content); 13 + 14 + // Create multihash (code 0x12 = sha2-256) 15 + let multihash = multihash::Multihash::wrap(0x12, &hash) 16 + .expect("SHA-256 hash should always fit in multihash"); 17 + 18 + // Create CIDv1 with raw codec (0x55) 19 + let cid = IpldCid::new_v1(0x55, multihash); 20 + 21 + // Convert to base32 string representation 22 + cid.to_string_of_base(multibase::Base::Base32Lower) 23 + .unwrap_or_else(|_| cid.to_string()) 24 + } 25 + 26 + #[cfg(test)] 27 + mod tests { 28 + use super::*; 29 + use base64::Engine; 30 + 31 + #[test] 32 + fn test_compute_cid() { 33 + // Test with a simple string: "hello" 34 + let content = b"hello"; 35 + let cid = compute_cid(content); 36 + 37 + // CID should start with 'baf' for raw codec base32 38 + assert!(cid.starts_with("baf")); 39 + } 40 + 41 + #[test] 42 + fn test_compute_cid_base64_encoded() { 43 + // Simulate the actual use case: gzipped then base64 encoded 44 + use flate2::write::GzEncoder; 45 + use flate2::Compression; 46 + use std::io::Write; 47 + 48 + let original = b"hello world"; 49 + 50 + // Gzip compress 51 + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); 52 + encoder.write_all(original).unwrap(); 53 + let gzipped = encoder.finish().unwrap(); 54 + 55 + // Base64 encode the gzipped data 56 + let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes(); 57 + 58 + // Compute CID on the base64 bytes 59 + let cid = compute_cid(&base64_bytes); 60 + 61 + // Should be a valid CID 62 + assert!(cid.starts_with("baf")); 63 + assert!(cid.len() > 10); 64 + } 65 + } 66 +

+140 -46

cli/src/main.rs

··· 1 1 mod builder_types; 2 2 mod place_wisp; 3 + mod cid; 4 + mod blob_map; 3 5 4 6 use clap::Parser; 5 7 use jacquard::CowStr; 6 - use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession}; 8 + use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession, AgentSession}; 7 9 use jacquard::oauth::client::OAuthClient; 8 10 use jacquard::oauth::loopback::LoopbackConfig; 9 11 use jacquard::prelude::IdentityResolver; ··· 11 13 use jacquard_common::types::blob::MimeType; 12 14 use miette::IntoDiagnostic; 13 15 use std::path::{Path, PathBuf}; 16 + use std::collections::HashMap; 14 17 use flate2::Compression; 15 18 use flate2::write::GzEncoder; 16 19 use std::io::Write; ··· 107 110 108 111 println!("Deploying site '{}'...", site_name); 109 112 113 + // Try to fetch existing manifest for incremental updates 114 + let existing_blob_map: HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)> = { 115 + use jacquard_common::types::string::AtUri; 116 + 117 + // Get the DID for this session 118 + let session_info = agent.session_info().await; 119 + if let Some((did, _)) = session_info { 120 + // Construct the AT URI for the record 121 + let uri_string = format!("at://{}/place.wisp.fs/{}", did, site_name); 122 + if let Ok(uri) = AtUri::new(&uri_string) { 123 + match agent.get_record::<Fs>(&uri).await { 124 + Ok(response) => { 125 + match response.into_output() { 126 + Ok(record_output) => { 127 + let existing_manifest = record_output.value; 128 + let blob_map = blob_map::extract_blob_map(&existing_manifest.root); 129 + println!("Found existing manifest with {} files, checking for changes...", blob_map.len()); 130 + blob_map 131 + } 132 + Err(_) => { 133 + println!("No existing manifest found, uploading all files..."); 134 + HashMap::new() 135 + } 136 + } 137 + } 138 + Err(_) => { 139 + // Record doesn't exist yet - this is a new site 140 + println!("No existing manifest found, uploading all files..."); 141 + HashMap::new() 142 + } 143 + } 144 + } else { 145 + println!("No existing manifest found (invalid URI), uploading all files..."); 146 + HashMap::new() 147 + } 148 + } else { 149 + println!("No existing manifest found (could not get DID), uploading all files..."); 150 + HashMap::new() 151 + } 152 + }; 153 + 110 154 // Build directory tree 111 - let root_dir = build_directory(agent, &path).await?; 155 + let (root_dir, total_files, reused_count) = build_directory(agent, &path, &existing_blob_map, String::new()).await?; 156 + let uploaded_count = total_files - reused_count; 112 157 113 - // Count total files 114 - let file_count = count_files(&root_dir); 115 - 116 158 // Create the Fs record 117 159 let fs_record = Fs::new() 118 160 .site(CowStr::from(site_name.clone())) 119 161 .root(root_dir) 120 - .file_count(file_count as i64) 162 + .file_count(total_files as i64) 121 163 .created_at(Datetime::now()) 122 164 .build(); 123 165 ··· 132 174 .and_then(|s| s.split('/').next()) 133 175 .ok_or_else(|| miette::miette!("Failed to parse DID from URI"))?; 134 176 135 - println!("Deployed site '{}': {}", site_name, output.uri); 136 - println!("Available at: https://sites.wisp.place/{}/{}", did, site_name); 177 + println!("\n✓ Deployed site '{}': {}", site_name, output.uri); 178 + println!(" Total files: {} ({} reused, {} uploaded)", total_files, reused_count, uploaded_count); 179 + println!(" Available at: https://sites.wisp.place/{}/{}", did, site_name); 137 180 138 181 Ok(()) 139 182 } 140 183 141 - 184 + /// Recursively build a Directory from a filesystem path 185 + /// current_path is the path from the root of the site (e.g., "" for root, "config" for config dir) 142 186 fn build_directory<'a>( 143 187 agent: &'a Agent<impl jacquard::client::AgentSession + IdentityResolver + 'a>, 144 188 dir_path: &'a Path, 145 - ) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<Directory<'static>>> + 'a>> 189 + existing_blobs: &'a HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, 190 + current_path: String, 191 + ) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<(Directory<'static>, usize, usize)>> + 'a>> 146 192 { 147 193 Box::pin(async move { 148 194 // Collect all directory entries first ··· 167 213 168 214 169 215 216 + let metadata = entry.metadata().into_diagnostic()?; 170 217 171 - 172 - 173 - 174 - 175 - 176 - 218 + if metadata.is_file() { 219 + // Construct full path for this file (for blob map lookup) 220 + let full_path = if current_path.is_empty() { 221 + name_str.clone() 222 + } else { 223 + format!("{}/{}", current_path, name_str) 224 + }; 225 + file_tasks.push((name_str, path, full_path)); 226 + } else if metadata.is_dir() { 227 + dir_tasks.push((name_str, path)); 228 + } 177 229 } 178 230 179 231 // Process files concurrently with a limit of 5 180 - let file_entries: Vec<Entry> = stream::iter(file_tasks) 181 - .map(|(name, path)| async move { 182 - let file_node = process_file(agent, &path).await?; 183 - Ok::<_, miette::Report>(Entry::new() 232 + let file_results: Vec<(Entry<'static>, bool)> = stream::iter(file_tasks) 233 + .map(|(name, path, full_path)| async move { 234 + let (file_node, reused) = process_file(agent, &path, &full_path, existing_blobs).await?; 235 + let entry = Entry::new() 184 236 .name(CowStr::from(name)) 185 237 .node(EntryNode::File(Box::new(file_node))) 186 - .build()) 238 + .build(); 239 + Ok::<_, miette::Report>((entry, reused)) 187 240 }) 188 241 .buffer_unordered(5) 189 242 .collect::<Vec<_>>() 190 243 .await 191 244 .into_iter() 192 245 .collect::<miette::Result<Vec<_>>>()?; 246 + 247 + let mut file_entries = Vec::new(); 248 + let mut reused_count = 0; 249 + let mut total_files = 0; 250 + 251 + for (entry, reused) in file_results { 252 + file_entries.push(entry); 253 + total_files += 1; 254 + if reused { 255 + reused_count += 1; 256 + } 257 + } 193 258 194 259 // Process directories recursively (sequentially to avoid too much nesting) 195 260 let mut dir_entries = Vec::new(); 196 261 for (name, path) in dir_tasks { 197 - let subdir = build_directory(agent, &path).await?; 262 + // Construct full path for subdirectory 263 + let subdir_path = if current_path.is_empty() { 264 + name.clone() 265 + } else { 266 + format!("{}/{}", current_path, name) 267 + }; 268 + let (subdir, sub_total, sub_reused) = build_directory(agent, &path, existing_blobs, subdir_path).await?; 198 269 dir_entries.push(Entry::new() 199 270 .name(CowStr::from(name)) 200 271 .node(EntryNode::Directory(Box::new(subdir))) 201 272 .build()); 273 + total_files += sub_total; 274 + reused_count += sub_reused; 202 275 } 203 276 204 277 // Combine file and directory entries 205 278 let mut entries = file_entries; 206 279 entries.extend(dir_entries); 207 280 208 - Ok(Directory::new() 281 + let directory = Directory::new() 209 282 .r#type(CowStr::from("directory")) 210 283 .entries(entries) 211 - .build()) 284 + .build(); 285 + 286 + Ok((directory, total_files, reused_count)) 212 287 }) 213 288 } 214 289 215 - /// Process a single file: gzip -> base64 -> upload blob 290 + /// Process a single file: gzip -> base64 -> upload blob (or reuse existing) 291 + /// Returns (File, reused: bool) 292 + /// file_path_key is the full path from the site root (e.g., "config/file.json") for blob map lookup 216 293 async fn process_file( 217 294 agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>, 218 295 file_path: &Path, 219 - ) -> miette::Result<File<'static>> 296 + file_path_key: &str, 297 + existing_blobs: &HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, 298 + ) -> miette::Result<(File<'static>, bool)> 220 299 { 221 300 // Read file 222 301 let file_data = std::fs::read(file_path).into_diagnostic()?; ··· 234 313 // Base64 encode the gzipped data 235 314 let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes(); 236 315 237 - // Upload blob as octet-stream 316 + // Compute CID for this file (CRITICAL: on base64-encoded gzipped content) 317 + let file_cid = cid::compute_cid(&base64_bytes); 318 + 319 + // Check if we have an existing blob with the same CID 320 + let existing_blob = existing_blobs.get(file_path_key); 321 + 322 + if let Some((existing_blob_ref, existing_cid)) = existing_blob { 323 + if existing_cid == &file_cid { 324 + // CIDs match - reuse existing blob 325 + println!(" ✓ Reusing blob for {} (CID: {})", file_path_key, file_cid); 326 + return Ok(( 327 + File::new() 328 + .r#type(CowStr::from("file")) 329 + .blob(existing_blob_ref.clone()) 330 + .encoding(CowStr::from("gzip")) 331 + .mime_type(CowStr::from(original_mime)) 332 + .base64(true) 333 + .build(), 334 + true 335 + )); 336 + } 337 + } 338 + 339 + // File is new or changed - upload it 340 + println!(" ↑ Uploading {} ({} bytes, CID: {})", file_path_key, base64_bytes.len(), file_cid); 238 341 let blob = agent.upload_blob( 239 342 base64_bytes, 240 343 MimeType::new_static("application/octet-stream"), 241 344 ).await?; 242 345 243 - Ok(File::new() 244 - .r#type(CowStr::from("file")) 245 - .blob(blob) 246 - .encoding(CowStr::from("gzip")) 247 - .mime_type(CowStr::from(original_mime)) 248 - .base64(true) 249 - .build()) 346 + Ok(( 347 + File::new() 348 + .r#type(CowStr::from("file")) 349 + .blob(blob) 350 + .encoding(CowStr::from("gzip")) 351 + .mime_type(CowStr::from(original_mime)) 352 + .base64(true) 353 + .build(), 354 + false 355 + )) 250 356 } 251 357 252 - /// Count total files in a directory tree 253 - fn count_files(dir: &Directory) -> usize { 254 - let mut count = 0; 255 - for entry in &dir.entries { 256 - match &entry.node { 257 - EntryNode::File(_) => count += 1, 258 - EntryNode::Directory(subdir) => count += count_files(subdir), 259 - _ => {} // Unknown variants 260 - } 261 - } 262 - count 263 - }

History