Lockfile · vmx-atproto-dev.bsky.social/mlf@6065106

+98 -5

mlf-cli/src/config.rs

··· 1 1 use serde::{Deserialize, Serialize}; 2 + use std::collections::HashMap; 2 3 use std::path::{Path, PathBuf}; 3 4 use thiserror::Error; 4 5 ··· 54 55 pub struct DependenciesConfig { 55 56 #[serde(default)] 56 57 pub dependencies: Vec<String>, 58 + 59 + #[serde(default = "default_allow_transitive_deps")] 60 + pub allow_transitive_deps: bool, 61 + 62 + #[serde(default = "default_optimize_transitive_fetches")] 63 + pub optimize_transitive_fetches: bool, 64 + } 65 + 66 + fn default_allow_transitive_deps() -> bool { 67 + true 68 + } 69 + 70 + fn default_optimize_transitive_fetches() -> bool { 71 + false 57 72 } 58 73 59 74 impl Default for DependenciesConfig { 60 75 fn default() -> Self { 61 76 Self { 62 77 dependencies: vec![], 78 + allow_transitive_deps: default_allow_transitive_deps(), 79 + optimize_transitive_fetches: default_optimize_transitive_fetches(), 63 80 } 64 81 } 65 82 } ··· 136 153 std::fs::write(&gitignore_path, "*\n!.gitignore\n")?; 137 154 } 138 155 139 - // Create or touch .lexicon-cache.toml 140 - let cache_file = mlf_dir.join(".lexicon-cache.toml"); 141 - if !cache_file.exists() { 142 - std::fs::write(&cache_file, "# Lexicon cache metadata\n")?; 156 + Ok(()) 157 + } 158 + 159 + /// Lock file format for tracking resolved lexicons 160 + #[derive(Debug, Serialize, Deserialize, Default)] 161 + pub struct LockFile { 162 + /// Lock file format version 163 + pub version: u32, 164 + 165 + /// All resolved lexicons (both direct and transitive dependencies) 166 + #[serde(default)] 167 + pub lexicons: HashMap<String, LockedLexicon>, 168 + } 169 + 170 + /// A single locked lexicon entry 171 + #[derive(Debug, Clone, Serialize, Deserialize)] 172 + pub struct LockedLexicon { 173 + /// The NSID of this lexicon 174 + pub nsid: String, 175 + 176 + /// The DID of the repository this was fetched from 177 + pub did: String, 178 + 179 + /// SHA-256 checksum of the JSON content 180 + pub checksum: String, 181 + 182 + /// List of NSIDs this lexicon depends on (external references) 183 + #[serde(default, skip_serializing_if = "Vec::is_empty")] 184 + pub dependencies: Vec<String>, 185 + } 186 + 187 + impl LockFile { 188 + pub fn new() -> Self { 189 + Self { 190 + version: 1, 191 + lexicons: HashMap::new(), 192 + } 143 193 } 144 194 145 - Ok(()) 195 + pub fn load(path: &Path) -> Result<Self, ConfigError> { 196 + if !path.exists() { 197 + return Ok(Self::new()); 198 + } 199 + 200 + let content = std::fs::read_to_string(path)?; 201 + toml::from_str(&content).map_err(|e| ConfigError::ParseError(e)) 202 + } 203 + 204 + pub fn save(&self, path: &Path) -> Result<(), ConfigError> { 205 + let content = toml::to_string_pretty(self) 206 + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; 207 + std::fs::write(path, content)?; 208 + Ok(()) 209 + } 210 + 211 + pub fn add_lexicon(&mut self, nsid: String, did: String, checksum: String, dependencies: Vec<String>) { 212 + self.lexicons.insert(nsid.clone(), LockedLexicon { 213 + nsid, 214 + did, 215 + checksum, 216 + dependencies, 217 + }); 218 + } 146 219 } 147 220 148 221 #[cfg(test)] ··· 155 228 assert_eq!(config.source.directory, "./lexicons"); 156 229 assert!(config.output.is_empty()); 157 230 assert!(config.dependencies.dependencies.is_empty()); 231 + } 232 + 233 + #[test] 234 + fn test_lockfile_basic() { 235 + let mut lockfile = LockFile::new(); 236 + assert_eq!(lockfile.version, 1); 237 + assert!(lockfile.lexicons.is_empty()); 238 + 239 + lockfile.add_lexicon( 240 + "app.bsky.actor.profile".to_string(), 241 + "did:plc:test".to_string(), 242 + "sha256:abc123".to_string(), 243 + vec![], 244 + ); 245 + 246 + assert_eq!(lockfile.lexicons.len(), 1); 247 + let locked = lockfile.lexicons.get("app.bsky.actor.profile").unwrap(); 248 + assert_eq!(locked.nsid, "app.bsky.actor.profile"); 249 + assert_eq!(locked.did, "did:plc:test"); 250 + assert_eq!(locked.checksum, "sha256:abc123"); 158 251 } 159 252 }

+520 -75

mlf-cli/src/fetch.rs

··· 1 - use crate::config::{find_project_root, get_mlf_cache_dir, init_mlf_cache, ConfigError, MlfConfig}; 2 - use chrono::{DateTime, Utc}; 1 + use crate::config::{find_project_root, get_mlf_cache_dir, init_mlf_cache, ConfigError, MlfConfig, LockFile}; 3 2 use hickory_resolver::config::*; 4 3 use hickory_resolver::Resolver; 5 4 use miette::Diagnostic; 6 - use serde::{Deserialize, Serialize}; 5 + use serde::Deserialize; 7 6 use sha2::{Digest, Sha256}; 8 - use std::collections::HashMap; 9 - use std::path::Path; 7 + use std::collections::HashSet; 10 8 use thiserror::Error; 11 9 12 10 #[derive(Error, Debug, Diagnostic)] ··· 43 41 #[diagnostic(code(mlf::fetch::io_error))] 44 42 IoError(#[from] std::io::Error), 45 43 46 - #[error("Failed to load cache: {0}")] 47 - #[diagnostic(code(mlf::fetch::cache_error))] 48 - CacheError(String), 49 - 50 44 #[error("Invalid NSID format: {0}")] 51 45 #[diagnostic(code(mlf::fetch::invalid_nsid))] 52 46 InvalidNsid(String), 53 47 } 54 48 55 - #[derive(Debug, Serialize, Deserialize)] 56 - pub struct LexiconCache { 57 - #[serde(default)] 58 - pub lexicons: HashMap<String, CacheEntry>, 59 - } 60 - 61 - #[derive(Debug, Serialize, Deserialize, Clone)] 62 - pub struct CacheEntry { 63 - pub nsid: String, 64 - pub fetched_at: DateTime<Utc>, 65 - pub did: String, 66 - #[serde(default)] 67 - pub hash: String, 68 - } 69 - 70 - impl LexiconCache { 71 - pub fn load(path: &Path) -> Result<Self, FetchError> { 72 - if !path.exists() { 73 - return Ok(Self { 74 - lexicons: HashMap::new(), 75 - }); 76 - } 77 - 78 - let content = std::fs::read_to_string(path)?; 79 - toml::from_str(&content).map_err(|e| FetchError::CacheError(e.to_string())) 80 - } 81 - 82 - pub fn save(&self, path: &Path) -> Result<(), FetchError> { 83 - let content = 84 - toml::to_string_pretty(self).map_err(|e| FetchError::CacheError(e.to_string()))?; 85 - std::fs::write(path, content)?; 86 - Ok(()) 87 - } 88 - 89 - pub fn add_entry(&mut self, nsid: String, did: String, hash: String) { 90 - self.lexicons.insert( 91 - nsid.clone(), 92 - CacheEntry { 93 - nsid, 94 - fetched_at: Utc::now(), 95 - did, 96 - hash, 97 - }, 98 - ); 99 - } 100 - } 101 49 102 50 #[derive(Debug, Deserialize)] 103 51 struct AtProtoRecord { ··· 106 54 } 107 55 108 56 /// Main entry point for fetch command 109 - pub fn run_fetch(nsid: Option<String>, save: bool) -> Result<(), FetchError> { 57 + pub fn run_fetch(nsid: Option<String>, save: bool, update: bool, locked: bool) -> Result<(), FetchError> { 58 + // Validate flags 59 + if update && locked { 60 + return Err(FetchError::HttpError( 61 + "Cannot use --update and --locked together".to_string() 62 + )); 63 + } 64 + 110 65 // Find project root 111 66 let current_dir = std::env::current_dir()?; 112 67 let project_root = ensure_project_root(&current_dir)?; ··· 125 80 } 126 81 None => { 127 82 // Fetch all dependencies from mlf.toml 128 - fetch_all_dependencies(&project_root) 83 + fetch_all_dependencies(&project_root, update, locked) 129 84 } 130 85 } 131 86 } ··· 156 111 } 157 112 } 158 113 159 - fn fetch_all_dependencies(project_root: &std::path::Path) -> Result<(), FetchError> { 114 + fn fetch_all_dependencies(project_root: &std::path::Path, update: bool, locked: bool) -> Result<(), FetchError> { 160 115 // Load mlf.toml 161 116 let config_path = project_root.join("mlf.toml"); 162 117 let config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?; ··· 166 121 return Ok(()); 167 122 } 168 123 169 - println!("Fetching {} dependencies...", config.dependencies.dependencies.len()); 124 + let allow_transitive = config.dependencies.allow_transitive_deps; 125 + 126 + // Load or create lockfile 127 + let lockfile_path = project_root.join("mlf-lock.toml"); 128 + let existing_lockfile = LockFile::load(&lockfile_path).map_err(FetchError::NoProjectRoot)?; 129 + let has_existing_lockfile = lockfile_path.exists() && !existing_lockfile.lexicons.is_empty(); 130 + 131 + // Handle --locked mode 132 + if locked { 133 + if !has_existing_lockfile { 134 + return Err(FetchError::HttpError( 135 + "No lockfile found. Run `mlf fetch` first to create mlf-lock.toml".to_string() 136 + )); 137 + } 138 + 139 + // In locked mode, we use the lockfile and verify nothing needs updating 140 + // For now, we'll just use the lockfile - verification can be enhanced later 141 + println!("Using locked dependencies from mlf-lock.toml"); 142 + return fetch_from_lockfile(project_root, &existing_lockfile); 143 + } 144 + 145 + // Determine fetch mode 146 + let mode = if update { 147 + "update (ignoring lockfile)" 148 + } else if has_existing_lockfile { 149 + "lockfile" 150 + } else { 151 + "fresh" 152 + }; 153 + 154 + println!("Fetching {} dependencies... (mode: {}, transitive deps: {})", 155 + config.dependencies.dependencies.len(), 156 + mode, 157 + if allow_transitive { "enabled" } else { "disabled" }); 158 + 159 + // In update mode or if no lockfile, do full fetch 160 + // In normal mode with lockfile, use lockfile for cached entries 161 + let mut lockfile = if update || !has_existing_lockfile { 162 + LockFile::new() 163 + } else { 164 + existing_lockfile 165 + }; 170 166 171 167 let mut errors = Vec::new(); 172 168 let mut success_count = 0; 169 + let mut fetched_nsids = HashSet::new(); 173 170 171 + // Fetch initial dependencies 174 172 for dep in &config.dependencies.dependencies { 175 173 println!("\nFetching: {}", dep); 176 - match fetch_lexicon(dep, project_root) { 174 + match fetch_lexicon_with_lock(dep, project_root, &mut lockfile) { 177 175 Ok(()) => { 178 176 success_count += 1; 177 + fetched_nsids.insert(dep.clone()); 179 178 } 180 179 Err(e) => { 181 180 errors.push((dep.clone(), format!("{}", e))); ··· 183 182 } 184 183 } 185 184 185 + // If transitive dependencies are enabled, iteratively fetch missing deps 186 + if allow_transitive { 187 + let mut iteration = 0; 188 + let max_iterations = 10; // Prevent infinite loops 189 + 190 + loop { 191 + iteration += 1; 192 + if iteration > max_iterations { 193 + eprintln!("\nWarning: Reached maximum iteration limit for transitive dependencies"); 194 + break; 195 + } 196 + 197 + // Collect unresolved references 198 + let unresolved = match collect_unresolved_references(project_root) { 199 + Ok(refs) => refs, 200 + Err(e) => { 201 + eprintln!("\nWarning: Failed to analyze dependencies: {}", e); 202 + break; 203 + } 204 + }; 205 + 206 + // Filter out NSIDs we've already fetched or tried to fetch 207 + let new_deps: HashSet<String> = unresolved 208 + .into_iter() 209 + .filter(|nsid| !fetched_nsids.contains(nsid)) 210 + .collect(); 211 + 212 + if new_deps.is_empty() { 213 + break; 214 + } 215 + 216 + // Determine whether to optimize transitive fetches 217 + let should_optimize = config.dependencies.optimize_transitive_fetches; 218 + 219 + if should_optimize { 220 + // Optimize the fetch patterns to reduce number of fetches 221 + let optimized_patterns = optimize_fetch_patterns(&new_deps); 222 + 223 + println!("\n→ Found {} unresolved reference(s), fetching {} optimized pattern(s)...", 224 + new_deps.len(), optimized_patterns.len()); 225 + 226 + // Track which patterns are wildcards and their constituent NSIDs 227 + let mut wildcard_failures: Vec<(String, Vec<String>)> = Vec::new(); 228 + 229 + for pattern in optimized_patterns { 230 + let is_wildcard = pattern.ends_with(".*"); 231 + println!("\nFetching transitive dependency: {}", pattern); 232 + fetched_nsids.insert(pattern.clone()); 233 + 234 + match fetch_lexicon_with_lock(&pattern, project_root, &mut lockfile) { 235 + Ok(()) => { 236 + success_count += 1; 237 + } 238 + Err(e) => { 239 + eprintln!(" Warning: Failed to fetch {}: {}", pattern, e); 240 + 241 + // If this was a wildcard that failed, collect the individual NSIDs for retry 242 + if is_wildcard { 243 + let pattern_prefix = pattern.strip_suffix(".*").unwrap(); 244 + let matching_nsids: Vec<String> = new_deps.iter() 245 + .filter(|nsid| nsid.starts_with(pattern_prefix)) 246 + .cloned() 247 + .collect(); 248 + 249 + if !matching_nsids.is_empty() { 250 + wildcard_failures.push((pattern.clone(), matching_nsids)); 251 + } 252 + } 253 + } 254 + } 255 + } 256 + 257 + // Retry failed wildcards with individual NSIDs 258 + if !wildcard_failures.is_empty() { 259 + println!("\n→ Retrying failed wildcard patterns with individual NSIDs..."); 260 + 261 + for (failed_pattern, nsids) in wildcard_failures { 262 + println!(" Retrying {} NSIDs from failed pattern: {}", nsids.len(), failed_pattern); 263 + 264 + for nsid in nsids { 265 + if !fetched_nsids.contains(&nsid) { 266 + println!(" Fetching: {}", nsid); 267 + fetched_nsids.insert(nsid.clone()); 268 + 269 + match fetch_lexicon_with_lock(&nsid, project_root, &mut lockfile) { 270 + Ok(()) => { 271 + success_count += 1; 272 + } 273 + Err(e) => { 274 + eprintln!(" Warning: Failed to fetch {}: {}", nsid, e); 275 + } 276 + } 277 + } 278 + } 279 + } 280 + } 281 + } else { 282 + // Fetch individually without optimization (safer, more predictable) 283 + println!("\n→ Found {} unresolved reference(s), fetching individually...", 284 + new_deps.len()); 285 + 286 + for nsid in &new_deps { 287 + println!("\nFetching transitive dependency: {}", nsid); 288 + fetched_nsids.insert(nsid.clone()); 289 + 290 + match fetch_lexicon_with_lock(nsid, project_root, &mut lockfile) { 291 + Ok(()) => { 292 + success_count += 1; 293 + } 294 + Err(e) => { 295 + // Don't fail the entire fetch for transitive deps 296 + eprintln!(" Warning: Failed to fetch {}: {}", nsid, e); 297 + } 298 + } 299 + } 300 + } 301 + } 302 + } 303 + 304 + // Save the lockfile 305 + lockfile.save(&lockfile_path).map_err(FetchError::NoProjectRoot)?; 306 + println!("\n→ Updated mlf-lock.toml"); 307 + 186 308 if !errors.is_empty() { 187 309 eprintln!( 188 310 "\n{} dependency(ies) fetched successfully, {} error(s):", ··· 202 324 Ok(()) 203 325 } 204 326 327 + /// Fetch dependencies using the lockfile 328 + /// This refetches each lexicon from its recorded DID and verifies the checksum 329 + fn fetch_from_lockfile(project_root: &std::path::Path, lockfile: &LockFile) -> Result<(), FetchError> { 330 + if lockfile.lexicons.is_empty() { 331 + println!("Lockfile is empty"); 332 + return Ok(()); 333 + } 334 + 335 + println!("Fetching {} lexicon(s) from lockfile...", lockfile.lexicons.len()); 336 + 337 + let mut errors = Vec::new(); 338 + let mut success_count = 0; 339 + 340 + // Fetch each lexicon from its DID 341 + for (nsid, locked) in &lockfile.lexicons { 342 + println!("\nRefetching: {}", nsid); 343 + 344 + // Fetch the lexicon using the DID from lockfile 345 + match fetch_specific_lexicon(nsid, &locked.did, &locked.checksum, project_root) { 346 + Ok(()) => { 347 + success_count += 1; 348 + } 349 + Err(e) => { 350 + errors.push((nsid.clone(), format!("{}", e))); 351 + } 352 + } 353 + } 354 + 355 + if !errors.is_empty() { 356 + eprintln!( 357 + "\n{} lexicon(s) fetched successfully, {} error(s):", 358 + success_count, 359 + errors.len() 360 + ); 361 + for (nsid, error) in &errors { 362 + eprintln!(" {} - {}", nsid, error); 363 + } 364 + return Err(FetchError::HttpError(format!( 365 + "Failed to fetch {} lexicons", 366 + errors.len() 367 + ))); 368 + } 369 + 370 + println!("\n✓ Successfully fetched all {} lexicons", success_count); 371 + Ok(()) 372 + } 373 + 374 + /// Fetch a specific lexicon by NSID from a known DID, verifying checksum 375 + fn fetch_specific_lexicon( 376 + nsid: &str, 377 + did: &str, 378 + expected_checksum: &str, 379 + project_root: &std::path::Path, 380 + ) -> Result<(), FetchError> { 381 + // Initialize .mlf directory 382 + init_mlf_cache(project_root).map_err(FetchError::InitFailed)?; 383 + let mlf_dir = get_mlf_cache_dir(project_root); 384 + 385 + // Fetch records from the DID 386 + let records = fetch_lexicon_records(did)?; 387 + 388 + // Find the specific NSID 389 + for record in records { 390 + let record_nsid = extract_nsid_from_record(&record)?; 391 + 392 + if record_nsid == nsid { 393 + // Found it! Process and verify checksum 394 + let json_str = serde_json::to_string_pretty(&record.value)?; 395 + let hash = calculate_hash(&json_str); 396 + 397 + if hash != expected_checksum { 398 + return Err(FetchError::HttpError(format!( 399 + "Checksum mismatch for {}: expected {}, got {}", 400 + nsid, expected_checksum, hash 401 + ))); 402 + } 403 + 404 + // Save JSON 405 + let mut json_path = mlf_dir.join("lexicons/json"); 406 + for segment in nsid.split('.') { 407 + json_path.push(segment); 408 + } 409 + json_path.set_extension("json"); 410 + 411 + if let Some(parent) = json_path.parent() { 412 + std::fs::create_dir_all(parent)?; 413 + } 414 + std::fs::write(&json_path, &json_str)?; 415 + println!(" → Saved JSON (checksum verified)"); 416 + 417 + // Convert to MLF 418 + let mlf_content = crate::generate::mlf::generate_mlf_from_json(&record.value) 419 + .map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?; 420 + 421 + let mut mlf_path = mlf_dir.join("lexicons/mlf"); 422 + for segment in nsid.split('.') { 423 + mlf_path.push(segment); 424 + } 425 + mlf_path.set_extension("mlf"); 426 + 427 + if let Some(parent) = mlf_path.parent() { 428 + std::fs::create_dir_all(parent)?; 429 + } 430 + std::fs::write(&mlf_path, mlf_content)?; 431 + println!(" → Converted to MLF"); 432 + 433 + return Ok(()); 434 + } 435 + } 436 + 437 + Err(FetchError::HttpError(format!( 438 + "Lexicon {} not found in repo {}", 439 + nsid, did 440 + ))) 441 + } 442 + 205 443 fn save_dependency(project_root: &std::path::Path, nsid: &str) -> Result<(), FetchError> { 206 444 let config_path = project_root.join("mlf.toml"); 207 445 let mut config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?; ··· 219 457 } 220 458 221 459 pub fn fetch_lexicon(nsid: &str, project_root: &std::path::Path) -> Result<(), FetchError> { 460 + let mut lockfile = LockFile::new(); 461 + fetch_lexicon_with_lock(nsid, project_root, &mut lockfile) 462 + } 463 + 464 + fn fetch_lexicon_with_lock(nsid: &str, project_root: &std::path::Path, lockfile: &mut LockFile) -> Result<(), FetchError> { 222 465 // Initialize .mlf directory 223 466 init_mlf_cache(project_root).map_err(FetchError::InitFailed)?; 224 467 225 468 let mlf_dir = get_mlf_cache_dir(&project_root); 226 - let cache_file = mlf_dir.join(".lexicon-cache.toml"); 227 - let mut cache = LexiconCache::load(&cache_file)?; 228 469 229 470 // Validate NSID format: must be specific (3+ segments) or use wildcard 230 471 validate_nsid_format(nsid)?; ··· 236 477 } else { 237 478 nsid 238 479 }; 239 - 240 - // Check if already cached (for specific NSIDs only) 241 - if !is_wildcard && cache.lexicons.contains_key(nsid) { 242 - println!("Lexicon '{}' is already cached. Skipping fetch.", nsid); 243 - println!(" (Use --force to re-fetch)"); 244 - return Ok(()); 245 - } 246 480 247 481 // Extract authority and name segments from NSID 248 482 // For "app.bsky.actor.profile", authority is "app.bsky", name is "actor.profile" ··· 339 573 // Calculate hash of JSON content 340 574 let hash = calculate_hash(&json_str); 341 575 342 - // Update cache 343 - cache.add_entry(record_nsid.clone(), did.clone(), hash); 576 + // Extract dependencies from JSON 577 + let dependencies = extract_dependencies_from_json(&record.value); 578 + 579 + // Update lockfile 580 + lockfile.add_lexicon(record_nsid.clone(), did.clone(), hash.clone(), dependencies); 344 581 } 345 - 346 - // Save cache 347 - cache.save(&cache_file)?; 348 582 349 583 if processed_count == 0 { 350 584 return Err(FetchError::HttpError(format!( ··· 587 821 fn calculate_hash(content: &str) -> String { 588 822 let mut hasher = Sha256::new(); 589 823 hasher.update(content.as_bytes()); 590 - format!("{:x}", hasher.finalize()) 824 + format!("sha256:{:x}", hasher.finalize()) 825 + } 826 + 827 + /// Extract external references from a lexicon JSON 828 + /// Returns a list of NSIDs that this lexicon depends on 829 + fn extract_dependencies_from_json(json: &serde_json::Value) -> Vec<String> { 830 + let mut deps = HashSet::new(); 831 + 832 + fn visit_value(value: &serde_json::Value, deps: &mut HashSet<String>) { 833 + match value { 834 + serde_json::Value::Object(map) => { 835 + // Check if this is a ref object 836 + if let Some(ref_val) = map.get("ref") { 837 + if let Some(ref_str) = ref_val.as_str() { 838 + // External refs are multi-segment NSIDs 839 + if ref_str.contains('.') { 840 + deps.insert(ref_str.to_string()); 841 + } 842 + } 843 + } 844 + 845 + // Recurse into all values 846 + for val in map.values() { 847 + visit_value(val, deps); 848 + } 849 + } 850 + serde_json::Value::Array(arr) => { 851 + for val in arr { 852 + visit_value(val, deps); 853 + } 854 + } 855 + _ => {} 856 + } 857 + } 858 + 859 + visit_value(json, &mut deps); 860 + let mut result: Vec<String> = deps.into_iter().collect(); 861 + result.sort(); 862 + result 863 + } 864 + 865 + /// Extract external references from MLF files that need to be resolved 866 + /// Returns a set of namespace patterns (not full NSIDs) that need to be fetched 867 + fn collect_unresolved_references(project_root: &std::path::Path) -> Result<HashSet<String>, FetchError> { 868 + use mlf_lang::{parser, workspace::Workspace}; 869 + 870 + let mlf_dir = get_mlf_cache_dir(project_root); 871 + let mlf_lexicons_dir = mlf_dir.join("lexicons/mlf"); 872 + 873 + if !mlf_lexicons_dir.exists() { 874 + return Ok(HashSet::new()); 875 + } 876 + 877 + // Build a workspace from all fetched MLF files 878 + let mut workspace = Workspace::new(); 879 + let mut unresolved = HashSet::new(); 880 + 881 + // Recursively find all .mlf files 882 + fn collect_mlf_files(dir: &std::path::Path, files: &mut Vec<std::path::PathBuf>) -> std::io::Result<()> { 883 + if dir.is_dir() { 884 + for entry in std::fs::read_dir(dir)? { 885 + let entry = entry?; 886 + let path = entry.path(); 887 + if path.is_dir() { 888 + collect_mlf_files(&path, files)?; 889 + } else if path.extension().and_then(|s| s.to_str()) == Some("mlf") { 890 + files.push(path); 891 + } 892 + } 893 + } 894 + Ok(()) 895 + } 896 + 897 + let mut mlf_files = Vec::new(); 898 + collect_mlf_files(&mlf_lexicons_dir, &mut mlf_files)?; 899 + 900 + // Parse each MLF file and add to workspace 901 + for mlf_file in mlf_files { 902 + let content = std::fs::read_to_string(&mlf_file)?; 903 + 904 + // Extract namespace from file path 905 + // e.g., ".mlf/lexicons/mlf/place/stream/key.mlf" -> "place.stream.key" 906 + let relative_path = mlf_file.strip_prefix(&mlf_lexicons_dir) 907 + .map_err(|_| FetchError::IoError(std::io::Error::new( 908 + std::io::ErrorKind::Other, 909 + "Failed to compute relative path" 910 + )))?; 911 + 912 + let namespace = relative_path 913 + .with_extension("") 914 + .to_string_lossy() 915 + .replace(std::path::MAIN_SEPARATOR, "."); 916 + 917 + // Parse the lexicon 918 + if let Ok(lexicon) = parser::parse_lexicon(&content) { 919 + let _ = workspace.add_module(namespace, lexicon); 920 + } 921 + } 922 + 923 + // Resolve to find undefined references 924 + if let Err(errors) = workspace.resolve() { 925 + for error in errors.errors { 926 + if let mlf_lang::error::ValidationError::UndefinedReference { name, .. } = error { 927 + // Only collect multi-segment NSIDs (external references) 928 + // Single-segment names are likely local typos 929 + if name.contains('.') { 930 + // Convert type reference to namespace pattern 931 + // e.g., "app.bsky.actor.defs.profileViewBasic" -> "app.bsky.actor.*" 932 + // We fetch the whole namespace since we don't know which specific 933 + // lexicon file contains the type definition 934 + let namespace_pattern = extract_namespace_pattern(&name); 935 + unresolved.insert(namespace_pattern); 936 + } 937 + } 938 + } 939 + } 940 + 941 + Ok(unresolved) 942 + } 943 + 944 + /// Extract the namespace pattern from a type reference 945 + /// For "app.bsky.actor.defs.profileViewBasic" returns "app.bsky.actor.*" 946 + /// This handles the common ATProto pattern where defs are in a separate namespace 947 + fn extract_namespace_pattern(type_ref: &str) -> String { 948 + let parts: Vec<&str> = type_ref.split('.').collect(); 949 + 950 + // For references with 3+ segments, use the first 3 segments as the namespace 951 + // e.g., "app.bsky.actor.defs.profileViewBasic" -> "app.bsky.actor.*" 952 + // e.g., "com.atproto.repo.strongRef" -> "com.atproto.repo.*" 953 + if parts.len() >= 3 { 954 + format!("{}.{}.{}.*", parts[0], parts[1], parts[2]) 955 + } else if parts.len() == 2 { 956 + // For 2-segment refs like "place.stream", fetch everything under that authority 957 + format!("{}.*", type_ref) 958 + } else { 959 + // Single segment or empty, just return as-is (shouldn't happen) 960 + type_ref.to_string() 961 + } 962 + } 963 + 964 + /// Optimize a set of NSIDs by collapsing them into the minimal set of fetch patterns 965 + /// For example: ["app.bsky.actor.foo", "app.bsky.actor.bar"] -> ["app.bsky.actor.*"] 966 + /// This function tries multiple grouping strategies to find the most efficient pattern 967 + fn optimize_fetch_patterns(nsids: &HashSet<String>) -> Vec<String> { 968 + use std::collections::BTreeMap; 969 + 970 + if nsids.is_empty() { 971 + return Vec::new(); 972 + } 973 + 974 + // Strategy 1: Try grouping by authority (first 2 segments) 975 + // e.g., ["app.bsky.actor.foo", "app.bsky.feed.bar"] -> ["app.bsky.*"] 976 + let mut authority_groups: BTreeMap<String, Vec<String>> = BTreeMap::new(); 977 + 978 + for nsid in nsids { 979 + let parts: Vec<&str> = nsid.split('.').collect(); 980 + if parts.len() >= 2 { 981 + let authority = format!("{}.{}", parts[0], parts[1]); 982 + authority_groups.entry(authority).or_insert_with(Vec::new).push(nsid.clone()); 983 + } 984 + } 985 + 986 + // Strategy 2: Try grouping by namespace prefix (all but last segment) 987 + // e.g., ["app.bsky.actor.foo", "app.bsky.actor.bar"] -> ["app.bsky.actor.*"] 988 + let mut prefix_groups: BTreeMap<String, Vec<String>> = BTreeMap::new(); 989 + 990 + for nsid in nsids { 991 + let parts: Vec<&str> = nsid.split('.').collect(); 992 + if parts.len() >= 3 { 993 + let prefix = parts[..parts.len() - 1].join("."); 994 + prefix_groups.entry(prefix).or_insert_with(Vec::new).push(nsid.clone()); 995 + } 996 + } 997 + 998 + let mut result = Vec::new(); 999 + let mut handled_nsids = HashSet::new(); 1000 + 1001 + // First pass: Apply namespace-level grouping (more specific) 1002 + for (prefix, group) in &prefix_groups { 1003 + if group.len() >= 2 && !handled_nsids.contains(&group[0]) { 1004 + result.push(format!("{}.*", prefix)); 1005 + for nsid in group { 1006 + handled_nsids.insert(nsid.clone()); 1007 + } 1008 + } 1009 + } 1010 + 1011 + // Second pass: For remaining NSIDs, consider authority-level grouping 1012 + // Only use authority wildcard if we have 3+ different namespaces under same authority 1013 + for (authority, group) in &authority_groups { 1014 + let unhandled: Vec<&String> = group.iter() 1015 + .filter(|nsid| !handled_nsids.contains(*nsid)) 1016 + .collect(); 1017 + 1018 + if unhandled.len() >= 3 { 1019 + result.push(format!("{}.*", authority)); 1020 + for nsid in &unhandled { 1021 + handled_nsids.insert((*nsid).clone()); 1022 + } 1023 + } 1024 + } 1025 + 1026 + // Third pass: Add remaining individual NSIDs 1027 + for nsid in nsids { 1028 + if !handled_nsids.contains(nsid) { 1029 + result.push(nsid.clone()); 1030 + } 1031 + } 1032 + 1033 + // Sort for consistent output 1034 + result.sort(); 1035 + result 591 1036 }

+8 -2

mlf-cli/src/main.rs

··· 63 63 64 64 #[arg(long, help = "Add namespace to dependencies in mlf.toml")] 65 65 save: bool, 66 + 67 + #[arg(long, help = "Update dependencies to latest versions (ignores lockfile)")] 68 + update: bool, 69 + 70 + #[arg(long, help = "Require lockfile and fail if dependencies need updating")] 71 + locked: bool, 66 72 }, 67 73 } 68 74 ··· 134 140 generate::run_all().into_diagnostic() 135 141 } 136 142 }, 137 - Commands::Fetch { nsid, save } => { 138 - fetch::run_fetch(nsid, save).into_diagnostic() 143 + Commands::Fetch { nsid, save, update, locked } => { 144 + fetch::run_fetch(nsid, save, update, locked).into_diagnostic() 139 145 } 140 146 }; 141 147

+33 -5

website/content/docs/cli/02-configuration.md

··· 91 91 "app.bsky", 92 92 "com.atproto" 93 93 ] 94 + 95 + # Enable/disable transitive dependency resolution (default: true) 96 + allow_transitive_deps = true 97 + 98 + # Enable/disable fetch optimization (default: false) 99 + # When true, tries to collapse similar NSIDs into wildcards 100 + optimize_transitive_fetches = false 94 101 ``` 95 102 96 - These dependencies are fetched when you run `mlf fetch` without arguments. 103 + **Options:** 104 + 105 + - `dependencies` - List of NSID patterns to fetch (supports wildcards like `app.bsky.*`) 106 + - `allow_transitive_deps` - Automatically fetch dependencies of dependencies (default: `true`) 107 + - `optimize_transitive_fetches` - Group similar NSIDs into wildcards to reduce fetch count (default: `false`) 108 + 109 + These dependencies are fetched when you run `mlf fetch` without arguments. MLF automatically resolves transitive dependencies unless `allow_transitive_deps` is set to `false`. See the [Fetch Command](../07-fetch/#transitive-dependencies) for more details. 97 110 98 111 ## Commands Using Configuration 99 112 ··· 257 270 ``` 258 271 .mlf/ 259 272 ├── .gitignore # Automatically created 260 - ├── .lexicon-cache.toml # Metadata about fetched lexicons 261 273 └── lexicons/ 262 274 ├── json/ # Original JSON lexicons 263 275 │ ├── app.bsky.actor.profile.json ··· 269 281 270 282 The `.mlf` directory is automatically added to `.gitignore`, so fetched lexicons won't be committed to your repository. 271 283 284 + ## Lockfile 285 + 286 + MLF tracks resolved lexicons in `mlf-lock.toml` at your project root: 287 + 288 + ```toml 289 + version = 1 290 + 291 + [lexicons."app.bsky.actor.profile"] 292 + nsid = "app.bsky.actor.profile" 293 + did = "did:plc:4v4y5r3lwsbtmsxhile2ljac" 294 + checksum = "sha256:abc123..." 295 + dependencies = ["com.atproto.repo.strongRef"] 296 + ``` 297 + 298 + **Always commit `mlf-lock.toml`** to version control to ensure reproducible builds. See the [Fetch Command](../07-fetch/#lockfile-mlf-lock-toml) documentation for details. 299 + 272 300 ## Best Practices 273 301 274 - 1. **Commit `mlf.toml`** - Version control your configuration 275 - 2. **Don't commit `.mlf/`** - Let each developer fetch dependencies 302 + 1. **Commit `mlf.toml` and `mlf-lock.toml`** - Version control your configuration and lockfile 303 + 2. **Don't commit `.mlf/`** - Let each developer fetch dependencies independently 276 304 3. **Use semantic namespaces** - Organize lexicons by domain 277 305 4. **Set consistent root** - Keep your source directory as the root for namespace calculation 278 306 5. **Multiple outputs** - Generate both lexicons and code simultaneously 279 - 6. **CI/CD integration** - Run `mlf check` in your CI pipeline 307 + 6. **CI/CD integration** - Run `mlf check` and `mlf fetch --locked` in your CI pipeline 280 308 281 309 ## Override Configuration 282 310

+5 -3

website/content/docs/cli/03-init.md

··· 36 36 ``` 37 37 .mlf/ 38 38 ├── .gitignore # Ignores all files except itself 39 - ├── .lexicon-cache.toml # Metadata about fetched lexicons 40 39 └── lexicons/ 41 40 ├── json/ # Original JSON lexicons 42 41 └── mlf/ # Converted MLF format 43 42 ``` 44 43 45 44 The `.mlf` directory is automatically added to `.gitignore` so fetched lexicons aren't committed to version control. 45 + 46 + When you fetch dependencies, a **mlf-lock.toml** lockfile is created at the project root to track resolved lexicon versions. This lockfile should be committed to version control. 46 47 47 48 ## Interactive Mode 48 49 ··· 197 198 198 199 1. **Always start with init** - It sets up the correct structure 199 200 2. **Use --yes in scripts** - Avoids hanging on prompts 200 - 3. **Commit mlf.toml** - Track your project configuration 201 - 4. **Don't commit .mlf/** - Let each developer fetch dependencies 201 + 3. **Commit mlf.toml and mlf-lock.toml** - Track your project configuration and lockfile 202 + 4. **Don't commit .mlf/** - Let each developer fetch dependencies independently 202 203 5. **Customize after init** - Edit `mlf.toml` to add outputs and dependencies 204 + 6. **Use --locked in CI** - Run `mlf fetch --locked` for reproducible builds

+152 -49

website/content/docs/cli/07-fetch.md

··· 9 9 ## Usage 10 10 11 11 ```bash 12 - # Fetch all dependencies from mlf.toml 12 + # Fetch all dependencies from mlf.toml (use lockfile if present) 13 13 mlf fetch 14 + 15 + # Fetch and update all dependencies to latest versions 16 + mlf fetch --update 17 + 18 + # Strict mode: fetch from lockfile only (for CI/CD) 19 + mlf fetch --locked 14 20 15 21 # Fetch a specific lexicon 16 22 mlf fetch <NSID> ··· 30 36 31 37 **Options:** 32 38 - `--save` - Add the NSID/pattern to dependencies in `mlf.toml` 39 + - `--update` - Update dependencies to latest versions (ignores lockfile) 40 + - `--locked` - Require lockfile and fail if dependencies need updating (for CI/CD) 41 + 42 + ## Lockfile (`mlf-lock.toml`) 43 + 44 + MLF uses a lockfile to ensure reproducible builds, similar to `package-lock.json` (npm) or `Cargo.lock` (Rust). 45 + 46 + ### Lockfile Format 47 + 48 + ```toml 49 + version = 1 50 + 51 + [lexicons."place.stream.richtext.facet"] 52 + nsid = "place.stream.richtext.facet" 53 + did = "did:web:stream.place" 54 + checksum = "sha256:72c8986132821c7c6e3bd30d697f017861d77867b358e3c7850c19baef0a50d5" 55 + dependencies = ["app.bsky.richtext.facet#byteSlice"] 56 + 57 + [lexicons."app.bsky.richtext.facet"] 58 + nsid = "app.bsky.richtext.facet" 59 + did = "did:plc:4v4y5r3lwsbtmsxhile2ljac" 60 + checksum = "sha256:db59d218c482774e617bb5d90d19ab75e2557f8cdebafe798be01b37d957d336" 61 + ``` 62 + 63 + ### Fetch Modes 64 + 65 + | Mode | Command | Behavior | 66 + |------|---------|----------| 67 + | **Fresh** | `mlf fetch` | No lockfile exists, performs full DNS lookup and fetch, creates lockfile | 68 + | **Lockfile** | `mlf fetch` | Uses existing lockfile to guide fetch, updates lockfile if dependencies change | 69 + | **Update** | `mlf fetch --update` | Ignores lockfile, refetches everything, updates lockfile with latest versions | 70 + | **Locked** | `mlf fetch --locked` | Strict CI mode, uses only lockfile, verifies checksums, fails if no lockfile exists | 71 + 72 + ### When to Use Each Mode 73 + 74 + - **Development**: Use `mlf fetch` (default) - respects lockfile for consistency 75 + - **Update deps**: Use `mlf fetch --update` - gets latest versions 76 + - **CI/Production**: Use `mlf fetch --locked` - ensures reproducible builds 77 + 78 + ## Transitive Dependencies 79 + 80 + MLF automatically resolves and fetches transitive dependencies (dependencies of dependencies). 81 + 82 + ### Example 83 + 84 + If `place.stream.richtext.facet` depends on `app.bsky.richtext.facet#byteSlice`, MLF will: 85 + 1. Fetch `place.stream.richtext.*` (your explicit dependency) 86 + 2. Parse the lexicons to find external references 87 + 3. Automatically fetch `app.bsky.richtext.*` (transitive dependency) 88 + 4. Record both in `mlf-lock.toml` 89 + 90 + ### Configuration 91 + 92 + Control transitive dependency resolution in `mlf.toml`: 93 + 94 + ```toml 95 + [dependencies] 96 + dependencies = ["place.stream.*"] 97 + 98 + # Enable/disable transitive dependency resolution (default: true) 99 + allow_transitive_deps = true 100 + 101 + # Enable/disable fetch optimization (default: false) 102 + # When true, tries to collapse similar NSIDs into wildcards 103 + optimize_transitive_fetches = false 104 + ``` 33 105 34 106 ## How It Works 35 107 ··· 39 111 2. **DID Resolution** - Resolves the DID to a PDS endpoint 40 112 3. **Fetch Records** - Queries `com.atproto.repo.listRecords` for lexicon schemas 41 113 4. **Save & Convert** - Saves JSON and converts to MLF format 114 + 5. **Update Lockfile** - Records NSIDs, DIDs, checksums, and dependencies 42 115 43 116 ## Examples 44 117 ··· 59 132 60 133 **Output:** 61 134 ``` 62 - Fetching 2 dependencies... 135 + Fetching 2 dependencies... (mode: fresh, transitive deps: enabled) 63 136 64 137 Fetching: com.example.forum.* 65 138 Fetching lexicons for pattern: com.example.forum.* ··· 69 142 Processing: com.example.forum.post 70 143 → Saved JSON to .mlf/lexicons/json/com/example/forum/post.json 71 144 → Converted to MLF at .mlf/lexicons/mlf/com/example/forum/post.mlf 72 - Processing: com.example.forum.thread 73 - → Saved JSON to .mlf/lexicons/json/com/example/forum/thread.json 74 - → Converted to MLF at .mlf/lexicons/mlf/com/example/forum/thread.mlf 75 145 ✓ Successfully fetched 2 lexicon(s) for com.example.forum.* 76 146 77 - Fetching: com.example.social.* 78 - ... 147 + → Updated mlf-lock.toml 79 148 80 149 ✓ Successfully fetched all 2 dependencies 81 150 ``` 82 151 152 + ### Update to Latest Versions 153 + 154 + ```bash 155 + mlf fetch --update 156 + ``` 157 + 158 + This ignores the lockfile and fetches the latest versions of all dependencies. 159 + 160 + ### CI/CD with Locked Mode 161 + 162 + ```bash 163 + mlf fetch --locked 164 + ``` 165 + 166 + **Output:** 167 + ``` 168 + Using locked dependencies from mlf-lock.toml 169 + Fetching 2 lexicon(s) from lockfile... 170 + 171 + Refetching: place.stream.richtext.facet 172 + → Using PDS: https://stream.place 173 + → Saved JSON (checksum verified) 174 + → Converted to MLF 175 + 176 + ✓ Successfully fetched all 2 lexicons 177 + ``` 178 + 179 + If no lockfile exists: 180 + ``` 181 + ✗ No lockfile found. Run `mlf fetch` first to create mlf-lock.toml 182 + ``` 183 + 83 184 ### Fetch Specific Lexicon 84 185 85 186 ```bash ··· 114 215 ``` 115 216 .mlf/ 116 217 ├── .gitignore # Auto-generated 117 - ├── .lexicon-cache.toml # Cache metadata 118 218 └── lexicons/ 119 219 ├── json/ # Original JSON lexicons 120 220 │ ├── com/ ··· 142 242 └── post.mlf 143 243 ``` 144 244 145 - ### Cache File 146 - 147 - The `.lexicon-cache.toml` tracks what's been fetched: 148 - 149 - ```toml 150 - [[lexicons."com.example.forum.post"]] 151 - nsid = "com.example.forum.post" 152 - fetched_at = "2024-01-15T10:30:00Z" 153 - did = "did:web:example.com" 154 - hash = "abc123..." 155 - ``` 245 + **Note:** The lockfile (`mlf-lock.toml`) lives at the project root, sibling to `mlf.toml`. 156 246 157 247 ## DNS Resolution 158 248 ··· 222 312 ... 223 313 ``` 224 314 225 - ## Re-fetching 226 - 227 - If a lexicon is already cached, fetch skips it: 228 - 229 - ```bash 230 - $ mlf fetch com.example.forum.post 231 - Lexicon 'com.example.forum.post' is already cached. Skipping fetch. 232 - (Use --force to re-fetch) 233 - ``` 234 - 235 - To re-fetch: 236 - 237 - ```bash 238 - mlf fetch com.example.forum.post --force # Not yet implemented 239 - ``` 240 - 241 315 ## Error Handling 242 316 243 317 ### DNS Errors ··· 276 350 ### Invalid NSID Format 277 351 278 352 ``` 279 - ✗ NSID must have at least 3 segments or use wildcard (e.g., 'com.example.forum.post' or 'com.example.forum.*'): com.example 353 + ✗ NSID must have at least 2 segments or use wildcard: com 280 354 ``` 281 355 282 356 **Solution:** 283 357 - Use a specific NSID: `com.example.forum.post` 284 358 - Or use a wildcard: `com.example.forum.*` 285 359 286 - ## Best Practices 360 + ### Checksum Mismatch (--locked mode) 287 361 288 - 1. **Fetch before work** - Always fetch dependencies before coding 289 - 2. **Use --save** - Keep `mlf.toml` up to date with dependencies 290 - 3. **Don't commit `.mlf/`** - Let each developer fetch independently 291 - 4. **Check DNS** - Verify TXT records before fetching 292 - 5. **Version dependencies** - Consider tracking lexicon versions (future feature) 362 + ``` 363 + ✗ Checksum mismatch for place.stream.facet: expected sha256:abc123, got sha256:def456 364 + ``` 365 + 366 + **Causes:** 367 + - Lexicon was updated on the server 368 + - Lock file is out of date 369 + 370 + **Solution:** 371 + ```bash 372 + mlf fetch --update # Update lockfile with new checksums 373 + ``` 374 + 375 + ## Best Practices 293 376 377 + 1. **Commit lockfile** - Always commit `mlf-lock.toml` to version control 378 + 2. **Use --locked in CI** - Ensures reproducible builds in CI/CD pipelines 379 + 3. **Fetch before work** - Always fetch dependencies before coding 380 + 4. **Use --save** - Keep `mlf.toml` up to date with dependencies 381 + 5. **Don't commit `.mlf/`** - Let each developer fetch independently 382 + 6. **Check DNS** - Verify TXT records before fetching 383 + 7. **Update explicitly** - Use `mlf fetch --update` when you want latest versions 294 384 295 385 ## Comparison with npm/cargo 296 386 297 - The fetch command is similar to package managers: 387 + The fetch command follows patterns from popular package managers: 298 388 299 - | Command | npm | cargo | mlf | 300 - |---------|-----|-------|-----| 301 - | Install deps | `npm install` | `cargo fetch` | `mlf fetch` | 302 - | Add dep | `npm install pkg --save` | `cargo add pkg` | `mlf fetch ns --save` | 389 + | Aspect | npm | Cargo | MLF | 390 + |--------|-----|-------|-----| 391 + | Install deps | `npm install` | `cargo build` | `mlf fetch` | 392 + | Update deps | `npm update` | `cargo update` | `mlf fetch --update` | 393 + | Strict mode | `npm ci` | `cargo build --locked` | `mlf fetch --locked` | 394 + | Add dep | `npm install pkg` | `cargo add pkg` | `mlf fetch ns --save` | 303 395 | Config file | `package.json` | `Cargo.toml` | `mlf.toml` | 396 + | Lock file | `package-lock.json` | `Cargo.lock` | `mlf-lock.toml` | 304 397 | Cache | `node_modules/` | `~/.cargo/` | `.mlf/` | 305 398 306 399 ## Troubleshooting ··· 321 414 - ✓ `com.example.forum.post` (specific lexicon) 322 415 - ✓ `com.example.forum.*` (wildcard) 323 416 - ✓ `app.bsky.feed.*` (real-world wildcard) 324 - - ✗ `com.example` (must be specific or use wildcard) 417 + - ✗ `com` (must have at least 2 segments) 325 418 326 419 ### Permission Errors 327 420 328 - Ensure you have write permissions for the project directory to create `.mlf/`. 421 + Ensure you have write permissions for the project directory to create `.mlf/` and `mlf-lock.toml`. 422 + 423 + ### Conflicting Flags 424 + 425 + ``` 426 + ✗ Cannot use --update and --locked together 427 + ``` 428 + 429 + Choose one mode: 430 + - Use `--update` to get latest versions 431 + - Use `--locked` for strict reproducible builds