A human-friendly DSL for ATProto Lexicons

Factor out mlf-lexicon-fetcher

+2028 -463
+17 -3
Cargo.lock
··· 1290 1290 "chrono", 1291 1291 "clap", 1292 1292 "glob", 1293 - "hickory-resolver", 1294 1293 "miette", 1295 1294 "mlf-codegen", 1296 1295 "mlf-codegen-go", ··· 1298 1297 "mlf-codegen-typescript", 1299 1298 "mlf-diagnostics", 1300 1299 "mlf-lang", 1300 + "mlf-lexicon-fetcher", 1301 1301 "mlf-validation", 1302 1302 "reqwest", 1303 1303 "serde", 1304 1304 "serde_json", 1305 1305 "sha2", 1306 1306 "thiserror 2.0.17", 1307 + "tokio", 1307 1308 "toml", 1308 1309 ] 1309 1310 ··· 1358 1359 "mlf-codegen", 1359 1360 "mlf-diagnostics", 1360 1361 "mlf-lang", 1362 + "mlf-lexicon-fetcher", 1361 1363 "serde", 1362 1364 "serde_json", 1365 + "tokio", 1363 1366 "toml", 1364 1367 ] 1365 1368 ··· 1373 1376 "serde", 1374 1377 "serde_json", 1375 1378 "toml", 1379 + ] 1380 + 1381 + [[package]] 1382 + name = "mlf-lexicon-fetcher" 1383 + version = "0.1.0" 1384 + dependencies = [ 1385 + "async-trait", 1386 + "hickory-resolver", 1387 + "reqwest", 1388 + "serde", 1389 + "serde_json", 1390 + "thiserror 2.0.17", 1391 + "tokio", 1376 1392 ] 1377 1393 1378 1394 [[package]] ··· 1801 1817 "base64", 1802 1818 "bytes", 1803 1819 "encoding_rs", 1804 - "futures-channel", 1805 1820 "futures-core", 1806 - "futures-util", 1807 1821 "h2", 1808 1822 "http", 1809 1823 "http-body",
+1
Cargo.toml
··· 7 7 "mlf-cli", 8 8 "mlf-codegen", 9 9 "mlf-diagnostics", 10 + "mlf-lexicon-fetcher", 10 11 "mlf-lang", 11 12 "mlf-lsp", 12 13 "mlf-validation", "mlf-wasm",
+12 -6
justfile
··· 5 5 default: test 6 6 7 7 # Run all tests (excluding problematic packages) 8 - test: test-lang test-codegen test-diagnostics test-validation 8 + test: test-lang test-codegen test-diagnostics test-lexicon-fetcher test-validation 9 9 10 10 # Run only language tests (mlf-lang crate) 11 11 test-lang: ··· 21 21 test-diagnostics: 22 22 @echo "\nRunning diagnostics integration tests..." 23 23 cargo test -p mlf-integration-tests --test diagnostics_integration -- --nocapture 24 + 25 + # Run lexicon fetcher tests 26 + test-lexicon-fetcher: 27 + @echo "\nRunning lexicon fetcher tests..." 28 + cargo test -p mlf-lexicon-fetcher -- --nocapture 24 29 25 30 # Run validation tests 26 31 test-validation: ··· 78 83 # Show test statistics 79 84 test-stats: 80 85 @echo "Test Statistics:" 81 - @echo " Lang tests: 21 tests in mlf-lang/tests/lang/" 82 - @echo " Codegen tests: 10 tests in tests/codegen/lexicon/" 83 - @echo " Diagnostics tests: 1 test in tests/diagnostics/" 84 - @echo " Validation tests: 12 tests in mlf-validation" 86 + @echo " Lang tests: 21 tests in mlf-lang/tests/lang/" 87 + @echo " Codegen tests: 10 tests in tests/codegen/lexicon/" 88 + @echo " Diagnostics tests: 1 test in tests/diagnostics/" 89 + @echo " Lexicon fetcher tests: 33 tests in mlf-lexicon-fetcher/" 90 + @echo " Validation tests: 12 tests in mlf-validation" 85 91 @echo "" 86 - @echo "Total integration tests: 44" 92 + @echo "Total integration tests: 77" 87 93 88 94 # List all test directories 89 95 test-list:
+3 -2
mlf-cli/Cargo.toml
··· 13 13 mlf-validation = { path = "../mlf-validation" } 14 14 mlf-codegen = { path = "../mlf-codegen" } 15 15 mlf-diagnostics = { path = "../mlf-diagnostics" } 16 + mlf-lexicon-fetcher = { path = "../mlf-lexicon-fetcher" } 16 17 clap = { version = "4.5.48", features = ["derive"] } 17 18 miette = { version = "7", features = ["fancy"] } 18 19 thiserror = "2" ··· 20 21 serde_json = "1" 21 22 glob = "0.3" 22 23 toml = "0.8" 23 - reqwest = { version = "0.12", features = ["blocking", "json"] } 24 + tokio = { version = "1", features = ["rt-multi-thread", "macros"] } 25 + reqwest = { version = "0.12", features = ["json"] } 24 26 chrono = { version = "0.4", features = ["serde"] } 25 - hickory-resolver = "0.24" 26 27 sha2 = "0.10" 27 28 28 29 # Optional code generator plugins
+107 -445
mlf-cli/src/fetch.rs
··· 1 1 use crate::config::{find_project_root, get_mlf_cache_dir, init_mlf_cache, ConfigError, MlfConfig, LockFile}; 2 - use hickory_resolver::config::*; 3 - use hickory_resolver::Resolver; 2 + use mlf_lexicon_fetcher::{optimize_fetch_patterns, ProductionLexiconFetcher}; 4 3 use miette::Diagnostic; 5 - use serde::Deserialize; 6 4 use sha2::{Digest, Sha256}; 7 5 use std::collections::HashSet; 8 6 use thiserror::Error; ··· 17 15 #[diagnostic(code(mlf::fetch::init_failed))] 18 16 InitFailed(#[source] std::io::Error), 19 17 20 - #[error("DNS lookup failed: {0}")] 21 - #[diagnostic(code(mlf::fetch::dns_error))] 22 - DnsError(String), 23 - 24 - #[error("Failed to parse DID from TXT record: {0}")] 25 - #[diagnostic(code(mlf::fetch::did_parse_error))] 26 - DidParseError(String), 27 - 28 18 #[error("Failed to fetch lexicon from ATProto repo: {0}")] 29 19 #[diagnostic(code(mlf::fetch::http_error))] 30 20 HttpError(String), ··· 47 37 } 48 38 49 39 50 - #[derive(Debug, Deserialize)] 51 - struct AtProtoRecord { 52 - uri: String, 53 - value: serde_json::Value, 54 - } 55 40 56 41 /// Main entry point for fetch command 57 - pub fn run_fetch(nsid: Option<String>, save: bool, update: bool, locked: bool) -> Result<(), FetchError> { 42 + pub async fn run_fetch(nsid: Option<String>, save: bool, update: bool, locked: bool) -> Result<(), FetchError> { 58 43 // Validate flags 59 44 if update && locked { 60 45 return Err(FetchError::HttpError( ··· 76 61 let config_path = project_root.join("mlf.toml"); 77 62 let config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?; 78 63 79 - fetch_lexicon_with_lock(&namespace, &project_root, &mut lockfile)?; 64 + fetch_lexicon_with_lock(&namespace, &project_root, &mut lockfile).await?; 80 65 81 66 // Handle transitive dependencies if enabled 82 67 if config.dependencies.allow_transitive_deps { ··· 85 70 &project_root, 86 71 &mut lockfile, 87 72 config.dependencies.optimize_transitive_fetches 88 - )?; 73 + ).await?; 89 74 } 90 75 91 76 // Save lockfile ··· 101 86 } 102 87 None => { 103 88 // Fetch all dependencies from mlf.toml 104 - fetch_all_dependencies(&project_root, update, locked) 89 + fetch_all_dependencies(&project_root, update, locked).await 105 90 } 106 91 } 107 92 } ··· 132 117 } 133 118 } 134 119 135 - fn fetch_all_dependencies(project_root: &std::path::Path, update: bool, locked: bool) -> Result<(), FetchError> { 120 + async fn fetch_all_dependencies(project_root: &std::path::Path, update: bool, locked: bool) -> Result<(), FetchError> { 136 121 // Load mlf.toml 137 122 let config_path = project_root.join("mlf.toml"); 138 123 let config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?; ··· 160 145 // In locked mode, we use the lockfile and verify nothing needs updating 161 146 // For now, we'll just use the lockfile - verification can be enhanced later 162 147 println!("Using locked dependencies from mlf-lock.toml"); 163 - return fetch_from_lockfile(project_root, &existing_lockfile); 148 + return fetch_from_lockfile(project_root, &existing_lockfile).await; 164 149 } 165 150 166 151 // Determine fetch mode ··· 192 177 // Fetch initial dependencies 193 178 for dep in &config.dependencies.dependencies { 194 179 println!("\nFetching: {}", dep); 195 - match fetch_lexicon_with_lock(dep, project_root, &mut lockfile) { 180 + match fetch_lexicon_with_lock(dep, project_root, &mut lockfile).await { 196 181 Ok(()) => { 197 182 success_count += 1; 198 183 fetched_nsids.insert(dep.clone()); ··· 205 190 206 191 // If transitive dependencies are enabled, fetch them 207 192 if allow_transitive { 208 - fetch_transitive_dependencies(&project_root, &mut lockfile, config.dependencies.optimize_transitive_fetches)?; 193 + fetch_transitive_dependencies(&project_root, &mut lockfile, config.dependencies.optimize_transitive_fetches).await?; 209 194 } 210 195 211 196 // Save the lockfile ··· 232 217 } 233 218 234 219 /// Fetch transitive dependencies by iteratively resolving unresolved references 235 - fn fetch_transitive_dependencies( 220 + async fn fetch_transitive_dependencies( 236 221 project_root: &std::path::Path, 237 222 lockfile: &mut LockFile, 238 223 optimize_fetches: bool ··· 287 272 println!("\nFetching transitive dependency: {}", pattern); 288 273 fetched_nsids.insert(pattern.clone()); 289 274 290 - match fetch_lexicon_with_lock(&pattern, project_root, lockfile) { 275 + match fetch_lexicon_with_lock(&pattern, project_root, lockfile).await { 291 276 Ok(()) => {} 292 277 Err(e) => { 293 278 eprintln!(" Warning: Failed to fetch {}: {}", pattern, e); ··· 320 305 println!(" Fetching: {}", nsid); 321 306 fetched_nsids.insert(nsid.clone()); 322 307 323 - match fetch_lexicon_with_lock(&nsid, project_root, lockfile) { 308 + match fetch_lexicon_with_lock(&nsid, project_root, lockfile).await { 324 309 Ok(()) => {} 325 310 Err(e) => { 326 311 eprintln!(" Warning: Failed to fetch {}: {}", nsid, e); ··· 339 324 println!("\nFetching transitive dependency: {}", nsid); 340 325 fetched_nsids.insert(nsid.clone()); 341 326 342 - match fetch_lexicon_with_lock(nsid, project_root, lockfile) { 327 + match fetch_lexicon_with_lock(nsid, project_root, lockfile).await { 343 328 Ok(()) => {} 344 329 Err(e) => { 345 330 // Don't fail the entire fetch for transitive deps ··· 355 340 356 341 /// Fetch dependencies using the lockfile 357 342 /// This refetches each lexicon from its recorded DID and verifies the checksum 358 - fn fetch_from_lockfile(project_root: &std::path::Path, lockfile: &LockFile) -> Result<(), FetchError> { 343 + async fn fetch_from_lockfile(project_root: &std::path::Path, lockfile: &LockFile) -> Result<(), FetchError> { 359 344 if lockfile.lexicons.is_empty() { 360 345 println!("Lockfile is empty"); 361 346 return Ok(()); ··· 371 356 println!("\nRefetching: {}", nsid); 372 357 373 358 // Fetch the lexicon using the DID from lockfile 374 - match fetch_specific_lexicon(nsid, &locked.did, &locked.checksum, project_root) { 359 + match fetch_specific_lexicon(nsid, &locked.did, &locked.checksum, project_root).await { 375 360 Ok(()) => { 376 361 success_count += 1; 377 362 } ··· 401 386 } 402 387 403 388 /// Fetch a specific lexicon by NSID from a known DID, verifying checksum 404 - fn fetch_specific_lexicon( 389 + async fn fetch_specific_lexicon( 405 390 nsid: &str, 406 391 did: &str, 407 392 expected_checksum: &str, ··· 411 396 init_mlf_cache(project_root).map_err(FetchError::InitFailed)?; 412 397 let mlf_dir = get_mlf_cache_dir(project_root); 413 398 414 - // Fetch records from the DID 415 - let records = fetch_lexicon_records(did)?; 399 + // Create fetcher and fetch from known DID (bypassing DNS) 400 + let fetcher = ProductionLexiconFetcher::production() 401 + .await 402 + .map_err(|e| FetchError::HttpError(format!("Failed to create fetcher: {}", e)))?; 416 403 417 - // Find the specific NSID 418 - for record in records { 419 - let record_nsid = extract_nsid_from_record(&record)?; 404 + let result = fetcher 405 + .fetch_from_did_with_metadata(did, nsid) 406 + .await 407 + .map_err(|e| FetchError::HttpError(format!("Failed to fetch from DID: {}", e)))?; 420 408 421 - if record_nsid == nsid { 422 - // Found it! Process and verify checksum 423 - let json_str = serde_json::to_string_pretty(&record.value)?; 424 - let hash = calculate_hash(&json_str); 409 + if result.lexicons.is_empty() { 410 + return Err(FetchError::HttpError(format!( 411 + "Lexicon {} not found in repo {}", 412 + nsid, did 413 + ))); 414 + } 425 415 426 - if hash != expected_checksum { 427 - return Err(FetchError::HttpError(format!( 428 - "Checksum mismatch for {}: expected {}, got {}", 429 - nsid, expected_checksum, hash 430 - ))); 431 - } 416 + // We should only get one lexicon for an exact NSID match 417 + let fetched = &result.lexicons[0]; 432 418 433 - // Save JSON 434 - let mut json_path = mlf_dir.join("lexicons/json"); 435 - for segment in nsid.split('.') { 436 - json_path.push(segment); 437 - } 438 - json_path.set_extension("json"); 419 + if fetched.nsid != nsid { 420 + return Err(FetchError::HttpError(format!( 421 + "Expected lexicon {}, but got {}", 422 + nsid, fetched.nsid 423 + ))); 424 + } 425 + 426 + // Verify checksum 427 + let json_str = serde_json::to_string_pretty(&fetched.lexicon)?; 428 + let hash = calculate_hash(&json_str); 439 429 440 - if let Some(parent) = json_path.parent() { 441 - std::fs::create_dir_all(parent)?; 442 - } 443 - std::fs::write(&json_path, &json_str)?; 444 - println!(" → Saved JSON (checksum verified)"); 430 + if hash != expected_checksum { 431 + return Err(FetchError::HttpError(format!( 432 + "Checksum mismatch for {}: expected {}, got {}", 433 + nsid, expected_checksum, hash 434 + ))); 435 + } 445 436 446 - // Convert to MLF 447 - let mlf_content = crate::generate::mlf::generate_mlf_from_json(&record.value) 448 - .map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?; 437 + // Save JSON 438 + let mut json_path = mlf_dir.join("lexicons/json"); 439 + for segment in nsid.split('.') { 440 + json_path.push(segment); 441 + } 442 + json_path.set_extension("json"); 449 443 450 - let mut mlf_path = mlf_dir.join("lexicons/mlf"); 451 - for segment in nsid.split('.') { 452 - mlf_path.push(segment); 453 - } 454 - mlf_path.set_extension("mlf"); 444 + if let Some(parent) = json_path.parent() { 445 + std::fs::create_dir_all(parent)?; 446 + } 447 + std::fs::write(&json_path, &json_str)?; 448 + println!(" → Saved JSON (checksum verified)"); 455 449 456 - if let Some(parent) = mlf_path.parent() { 457 - std::fs::create_dir_all(parent)?; 458 - } 459 - std::fs::write(&mlf_path, mlf_content)?; 460 - println!(" → Converted to MLF"); 450 + // Convert to MLF 451 + let mlf_content = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon) 452 + .map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?; 461 453 462 - return Ok(()); 463 - } 454 + let mut mlf_path = mlf_dir.join("lexicons/mlf"); 455 + for segment in nsid.split('.') { 456 + mlf_path.push(segment); 464 457 } 458 + mlf_path.set_extension("mlf"); 465 459 466 - Err(FetchError::HttpError(format!( 467 - "Lexicon {} not found in repo {}", 468 - nsid, did 469 - ))) 460 + if let Some(parent) = mlf_path.parent() { 461 + std::fs::create_dir_all(parent)?; 462 + } 463 + std::fs::write(&mlf_path, mlf_content)?; 464 + println!(" → Converted to MLF"); 465 + 466 + Ok(()) 470 467 } 471 468 472 469 fn save_dependency(project_root: &std::path::Path, nsid: &str) -> Result<(), FetchError> { ··· 485 482 Ok(()) 486 483 } 487 484 488 - pub fn fetch_lexicon(nsid: &str, project_root: &std::path::Path) -> Result<(), FetchError> { 489 - let mut lockfile = LockFile::new(); 490 - fetch_lexicon_with_lock(nsid, project_root, &mut lockfile) 491 - } 492 - 493 - fn fetch_lexicon_with_lock(nsid: &str, project_root: &std::path::Path, lockfile: &mut LockFile) -> Result<(), FetchError> { 485 + async fn fetch_lexicon_with_lock(nsid: &str, project_root: &std::path::Path, lockfile: &mut LockFile) -> Result<(), FetchError> { 494 486 // Initialize .mlf directory 495 487 init_mlf_cache(project_root).map_err(FetchError::InitFailed)?; 496 - 497 488 let mlf_dir = get_mlf_cache_dir(&project_root); 498 489 499 - // Validate NSID format: must be specific (3+ segments) or use wildcard 490 + // Validate NSID format 500 491 validate_nsid_format(nsid)?; 501 492 502 - // Check if it's a wildcard pattern 503 - let is_wildcard = nsid.ends_with(".*"); 504 - let nsid_pattern = if is_wildcard { 505 - nsid.strip_suffix(".*").unwrap() 506 - } else { 507 - nsid 508 - }; 509 - 510 - // Extract authority and name segments from NSID 511 - // For "app.bsky.actor.profile", authority is "app.bsky", name is "actor.profile" 512 - // For DNS lookup, we need "_lexicon.actor.bsky.app" 513 - let (authority, name_segments) = extract_authority_and_name(nsid_pattern)?; 514 493 println!("Fetching lexicons for pattern: {}", nsid); 515 494 516 - // Step 1: DNS TXT lookup 517 - let did = resolve_lexicon_did(&authority, &name_segments)?; 518 - println!(" → Resolved DID: {}", did); 495 + // Create the lexicon fetcher (encapsulates all DNS and HTTP logic) 496 + let fetcher = ProductionLexiconFetcher::production() 497 + .await 498 + .map_err(|e| FetchError::HttpError(format!("Failed to create fetcher: {}", e)))?; 519 499 520 - // Step 2: Query ATProto repo for lexicon schemas 521 - let records = fetch_lexicon_records(&did)?; 522 - println!(" → Found {} lexicon record(s)", records.len()); 500 + // Fetch lexicons with metadata 501 + let result = fetcher 502 + .fetch_with_metadata(nsid) 503 + .await 504 + .map_err(|e| FetchError::HttpError(format!("Failed to fetch: {}", e)))?; 523 505 524 - if records.is_empty() { 506 + if result.lexicons.is_empty() { 525 507 return Err(FetchError::HttpError(format!( 526 - "No lexicon records found for {}", 508 + "No lexicons matched pattern: {}", 527 509 nsid 528 510 ))); 529 511 } 530 512 531 - let mut processed_count = 0; 532 - 533 - // Step 3: Process each record 534 - for record in records { 535 - // Extract NSID from record URI or value 536 - let record_nsid = extract_nsid_from_record(&record)?; 537 - 538 - // Match against pattern 539 - let matches = if is_wildcard { 540 - // Wildcard: match all records starting with the pattern 541 - // For "app.bsky.actor.*", nsid_pattern is "app.bsky.actor" 542 - // Should match "app.bsky.actor.defs", "app.bsky.actor.profile", etc. 543 - let starts_with_pattern = record_nsid.starts_with(nsid_pattern); 544 - let has_more_segments = record_nsid.len() > nsid_pattern.len(); 545 - let is_direct_child = if starts_with_pattern && has_more_segments { 546 - // Check if the next character after the pattern is a dot 547 - record_nsid.chars().nth(nsid_pattern.len()) == Some('.') 548 - } else { 549 - false 550 - }; 551 - 552 - starts_with_pattern && has_more_segments && is_direct_child 553 - } else { 554 - // Specific: exact match only 555 - record_nsid == nsid 556 - }; 513 + println!(" → Found {} lexicon record(s)", result.lexicons.len()); 557 514 558 - if !matches { 559 - continue; 560 - } 515 + // Process each fetched lexicon 516 + for fetched in &result.lexicons { 517 + println!(" Processing: {}", fetched.nsid); 561 518 562 - println!(" Processing: {}", record_nsid); 563 - processed_count += 1; 564 - 565 - // Save JSON file with directory structure 566 - // e.g., "place.stream.key" -> "place/stream/key.json" 567 - let json_str = serde_json::to_string_pretty(&record.value)?; 519 + // Save JSON file 520 + let json_str = serde_json::to_string_pretty(&fetched.lexicon)?; 568 521 let mut json_path = mlf_dir.join("lexicons/json"); 569 - for segment in record_nsid.split('.') { 522 + for segment in fetched.nsid.split('.') { 570 523 json_path.push(segment); 571 524 } 572 525 json_path.set_extension("json"); 573 526 574 - // Create parent directories 575 527 if let Some(parent) = json_path.parent() { 576 528 std::fs::create_dir_all(parent)?; 577 529 } 578 - 579 530 std::fs::write(&json_path, &json_str)?; 580 531 println!(" → Saved JSON to {}", json_path.display()); 581 532 582 533 // Convert to MLF 583 - let mlf_content = crate::generate::mlf::generate_mlf_from_json(&record.value) 534 + let mlf_content = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon) 584 535 .map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?; 585 536 586 - // Save MLF file with directory structure 587 - // e.g., "place.stream.key" -> "place/stream/key.mlf" 537 + // Save MLF file 588 538 let mut mlf_path = mlf_dir.join("lexicons/mlf"); 589 - for segment in record_nsid.split('.') { 539 + for segment in fetched.nsid.split('.') { 590 540 mlf_path.push(segment); 591 541 } 592 542 mlf_path.set_extension("mlf"); 593 543 594 - // Create parent directories 595 544 if let Some(parent) = mlf_path.parent() { 596 545 std::fs::create_dir_all(parent)?; 597 546 } 598 - 599 547 std::fs::write(&mlf_path, mlf_content)?; 600 548 println!(" → Converted to MLF at {}", mlf_path.display()); 601 549 602 - // Calculate hash of JSON content 550 + // Calculate hash and extract dependencies for lockfile 603 551 let hash = calculate_hash(&json_str); 552 + let dependencies = extract_dependencies_from_json(&fetched.lexicon); 604 553 605 - // Extract dependencies from JSON 606 - let dependencies = extract_dependencies_from_json(&record.value); 607 - 608 - // Update lockfile 609 - lockfile.add_lexicon(record_nsid.clone(), did.clone(), hash.clone(), dependencies); 610 - } 611 - 612 - if processed_count == 0 { 613 - return Err(FetchError::HttpError(format!( 614 - "No lexicons matched pattern: {}", 615 - nsid 616 - ))); 554 + // Update lockfile with DID from fetcher metadata 555 + lockfile.add_lexicon(fetched.nsid.clone(), fetched.did.clone(), hash, dependencies); 617 556 } 618 557 619 - println!("✓ Successfully fetched {} lexicon(s) for {}", processed_count, nsid); 558 + println!("✓ Successfully fetched {} lexicon(s) for {}", result.lexicons.len(), nsid); 620 559 Ok(()) 621 560 } 622 561 623 562 fn validate_nsid_format(nsid: &str) -> Result<(), FetchError> { 624 - // Remove wildcard suffix for validation 625 - let nsid_base = nsid.strip_suffix(".*").unwrap_or(nsid); 563 + // Remove wildcard suffix for validation (both .* and ._) 564 + let nsid_base = nsid 565 + .strip_suffix(".*") 566 + .or_else(|| nsid.strip_suffix("._")) 567 + .unwrap_or(nsid); 626 568 627 569 let parts: Vec<&str> = nsid_base.split('.').collect(); 628 570 629 571 // NSID must have at least 2 segments (authority) 630 - // e.g., "place.stream", "place.stream.key", or "place.stream.*" 572 + // e.g., "place.stream", "place.stream.key", "place.stream.*", or "place.stream._" 631 573 if parts.len() < 2 { 632 574 return Err(FetchError::InvalidNsid(format!( 633 575 "NSID must have at least 2 segments (e.g., 'place.stream' or 'com.atproto.repo.strongRef'): {}", ··· 638 580 Ok(()) 639 581 } 640 582 641 - fn extract_authority_and_name(nsid_pattern: &str) -> Result<(String, String), FetchError> { 642 - // NSID format: authority.name(.name)* 643 - // For "place.stream.key", authority is "place.stream" (first 2), name is "key" 644 - // For "app.bsky.actor.profile", authority is "app.bsky" (first 2), name is "actor.profile" 645 - let parts: Vec<&str> = nsid_pattern.split('.').collect(); 646 - 647 - if parts.len() < 2 { 648 - return Err(FetchError::InvalidNsid(format!( 649 - "NSID must have at least 2 segments: {}", 650 - nsid_pattern 651 - ))); 652 - } 653 - 654 - // Authority is first 2 segments (reversed domain) 655 - let authority = format!("{}.{}", parts[0], parts[1]); 656 - 657 - // Name segments are everything after the authority 658 - let name_segments = if parts.len() > 2 { 659 - parts[2..].join(".") 660 - } else { 661 - String::new() 662 - }; 663 - 664 - Ok((authority, name_segments)) 665 - } 666 - 667 - fn resolve_lexicon_did(authority: &str, name_segments: &str) -> Result<String, FetchError> { 668 - // Reverse the authority for DNS lookup and prepend name segments 669 - // For "app.bsky" + "actor": "_lexicon.actor.bsky.app" 670 - // For "place.stream" + "key": "_lexicon.key.stream.place" 671 - let auth_parts: Vec<&str> = authority.split('.').collect(); 672 - let reversed_auth: Vec<&str> = auth_parts.iter().rev().copied().collect(); 673 - 674 - let dns_name = if name_segments.is_empty() { 675 - // No name segments, just use reversed authority 676 - // For "place.stream": "_lexicon.stream.place" 677 - format!("_lexicon.{}", reversed_auth.join(".")) 678 - } else { 679 - // Prepend name segments before reversed authority 680 - // For "app.bsky" + "actor": "_lexicon.actor.bsky.app" 681 - format!("_lexicon.{}.{}", name_segments, reversed_auth.join(".")) 682 - }; 683 - 684 - println!(" Looking up DNS TXT record: {}", dns_name); 685 - 686 - // Create DNS resolver 687 - let resolver = Resolver::new(ResolverConfig::default(), ResolverOpts::default()) 688 - .map_err(|e| FetchError::DnsError(format!("Failed to create DNS resolver: {}", e)))?; 689 - 690 - // Lookup TXT records 691 - let response = resolver 692 - .txt_lookup(&dns_name) 693 - .map_err(|e| FetchError::DnsError(format!("DNS TXT lookup failed for {}: {}", dns_name, e)))?; 694 - 695 - // Parse TXT records to find DID 696 - for txt_record in response.iter() { 697 - for txt_data in txt_record.txt_data() { 698 - let text = String::from_utf8_lossy(txt_data); 699 - // Look for "did=did:plc:..." or "did=did:web:..." 700 - if let Some(did_value) = text.strip_prefix("did=") { 701 - return Ok(did_value.trim().to_string()); 702 - } 703 - } 704 - } 705 - 706 - Err(FetchError::DidParseError(format!( 707 - "No DID found in TXT record for {}", 708 - dns_name 709 - ))) 710 - } 711 - 712 - fn fetch_lexicon_records(did: &str) -> Result<Vec<AtProtoRecord>, FetchError> { 713 - // Query the ATProto repo for records in com.atproto.lexicon.schema collection 714 - // We need to use the repo.listRecords XRPC endpoint 715 - // Note: This API is paginated, so we need to fetch all pages 716 - 717 - // First, resolve the DID to a PDS endpoint 718 - let pds_url = resolve_did_to_pds(did)?; 719 - 720 - println!(" → Using PDS: {}", pds_url); 721 - 722 - let mut all_records = Vec::new(); 723 - let mut cursor: Option<String> = None; 724 - let mut page_num = 1; 725 - 726 - loop { 727 - // Build URL with optional cursor 728 - let url = if let Some(ref c) = cursor { 729 - format!( 730 - "{}/xrpc/com.atproto.repo.listRecords?repo={}&collection=com.atproto.lexicon.schema&cursor={}", 731 - pds_url, did, c 732 - ) 733 - } else { 734 - format!( 735 - "{}/xrpc/com.atproto.repo.listRecords?repo={}&collection=com.atproto.lexicon.schema", 736 - pds_url, did 737 - ) 738 - }; 739 - 740 - println!(" Fetching lexicon records (page {})...", page_num); 741 - 742 - let response = reqwest::blocking::get(&url) 743 - .map_err(|e| FetchError::HttpError(format!("Failed to fetch records: {}", e)))?; 744 - 745 - if !response.status().is_success() { 746 - return Err(FetchError::HttpError(format!( 747 - "HTTP {} when fetching records", 748 - response.status() 749 - ))); 750 - } 751 - 752 - let mut list_response: serde_json::Value = response 753 - .json() 754 - .map_err(|e| FetchError::HttpError(format!("Failed to parse response: {}", e)))?; 755 - 756 - // Extract records 757 - if let Some(records_array) = list_response.get_mut("records") { 758 - if let Some(records) = records_array.as_array_mut() { 759 - for record_value in records.drain(..) { 760 - let record: AtProtoRecord = serde_json::from_value(record_value) 761 - .map_err(|e| FetchError::HttpError(format!("Failed to parse record: {}", e)))?; 762 - all_records.push(record); 763 - } 764 - } 765 - } 766 - 767 - // Check for cursor to continue pagination 768 - cursor = list_response.get("cursor") 769 - .and_then(|c| c.as_str()) 770 - .map(|s| s.to_string()); 771 - 772 - if cursor.is_none() { 773 - break; 774 - } 775 - 776 - page_num += 1; 777 - } 778 - 779 - Ok(all_records) 780 - } 781 - 782 - fn resolve_did_to_pds(did: &str) -> Result<String, FetchError> { 783 - // For did:web:, extract the domain 784 - if let Some(domain) = did.strip_prefix("did:web:") { 785 - return Ok(format!("https://{}", domain)); 786 - } 787 - 788 - // For did:plc:, we need to query the PLC directory 789 - if did.starts_with("did:plc:") { 790 - // Query plc.directory to resolve DID document 791 - let url = format!("https://plc.directory/{}", did); 792 - 793 - let response = reqwest::blocking::get(&url) 794 - .map_err(|e| FetchError::HttpError(format!("Failed to resolve DID: {}", e)))?; 795 - 796 - if !response.status().is_success() { 797 - return Err(FetchError::HttpError(format!( 798 - "Failed to resolve DID {}: HTTP {}", 799 - did, 800 - response.status() 801 - ))); 802 - } 803 - 804 - let did_doc: serde_json::Value = response 805 - .json() 806 - .map_err(|e| FetchError::HttpError(format!("Failed to parse DID document: {}", e)))?; 807 - 808 - // Extract PDS endpoint from service array 809 - if let Some(services) = did_doc.get("service").and_then(|v| v.as_array()) { 810 - for service in services { 811 - if service.get("type").and_then(|v| v.as_str()) == Some("AtprotoPersonalDataServer") { 812 - if let Some(endpoint) = service.get("serviceEndpoint").and_then(|v| v.as_str()) { 813 - return Ok(endpoint.trim_end_matches('/').to_string()); 814 - } 815 - } 816 - } 817 - } 818 - 819 - return Err(FetchError::HttpError(format!( 820 - "No PDS endpoint found in DID document for {}", 821 - did 822 - ))); 823 - } 824 - 825 - Err(FetchError::HttpError(format!( 826 - "Unsupported DID method: {}", 827 - did 828 - ))) 829 - } 830 - 831 - fn extract_nsid_from_record(record: &AtProtoRecord) -> Result<String, FetchError> { 832 - // The record value should have an "id" field with the NSID 833 - if let Some(id) = record.value.get("id").and_then(|v| v.as_str()) { 834 - return Ok(id.to_string()); 835 - } 836 - 837 - // Fallback: try to extract from URI 838 - // URI format: at://did:plc:xxx/com.atproto.lexicon.schema/nsid 839 - if let Some(rkey) = record.uri.split('/').last() { 840 - return Ok(rkey.to_string()); 841 - } 842 - 843 - Err(FetchError::HttpError(format!( 844 - "Could not extract NSID from record: {}", 845 - record.uri 846 - ))) 847 - } 848 583 849 584 /// Calculate SHA-256 hash of content 850 585 fn calculate_hash(content: &str) -> String { ··· 994 729 } 995 730 } 996 731 997 - /// Optimize a set of NSIDs by collapsing them into the minimal set of fetch patterns 998 - /// For example: ["app.bsky.actor.foo", "app.bsky.actor.bar"] -> ["app.bsky.actor.*"] 999 - /// This function tries multiple grouping strategies to find the most efficient pattern 1000 - fn optimize_fetch_patterns(nsids: &HashSet<String>) -> Vec<String> { 1001 - use std::collections::BTreeMap; 1002 - 1003 - if nsids.is_empty() { 1004 - return Vec::new(); 1005 - } 1006 - 1007 - // Strategy 1: Try grouping by authority (first 2 segments) 1008 - // e.g., ["app.bsky.actor.foo", "app.bsky.feed.bar"] -> ["app.bsky.*"] 1009 - let mut authority_groups: BTreeMap<String, Vec<String>> = BTreeMap::new(); 1010 - 1011 - for nsid in nsids { 1012 - let parts: Vec<&str> = nsid.split('.').collect(); 1013 - if parts.len() >= 2 { 1014 - let authority = format!("{}.{}", parts[0], parts[1]); 1015 - authority_groups.entry(authority).or_insert_with(Vec::new).push(nsid.clone()); 1016 - } 1017 - } 1018 - 1019 - // Strategy 2: Try grouping by namespace prefix (all but last segment) 1020 - // e.g., ["app.bsky.actor.foo", "app.bsky.actor.bar"] -> ["app.bsky.actor.*"] 1021 - let mut prefix_groups: BTreeMap<String, Vec<String>> = BTreeMap::new(); 1022 - 1023 - for nsid in nsids { 1024 - let parts: Vec<&str> = nsid.split('.').collect(); 1025 - if parts.len() >= 3 { 1026 - let prefix = parts[..parts.len() - 1].join("."); 1027 - prefix_groups.entry(prefix).or_insert_with(Vec::new).push(nsid.clone()); 1028 - } 1029 - } 1030 - 1031 - let mut result = Vec::new(); 1032 - let mut handled_nsids = HashSet::new(); 1033 - 1034 - // First pass: Apply namespace-level grouping (more specific) 1035 - for (prefix, group) in &prefix_groups { 1036 - if group.len() >= 2 && !handled_nsids.contains(&group[0]) { 1037 - result.push(format!("{}.*", prefix)); 1038 - for nsid in group { 1039 - handled_nsids.insert(nsid.clone()); 1040 - } 1041 - } 1042 - } 1043 - 1044 - // Second pass: For remaining NSIDs, consider authority-level grouping 1045 - // Only use authority wildcard if we have 3+ different namespaces under same authority 1046 - for (authority, group) in &authority_groups { 1047 - let unhandled: Vec<&String> = group.iter() 1048 - .filter(|nsid| !handled_nsids.contains(*nsid)) 1049 - .collect(); 1050 - 1051 - if unhandled.len() >= 3 { 1052 - result.push(format!("{}.*", authority)); 1053 - for nsid in &unhandled { 1054 - handled_nsids.insert((*nsid).clone()); 1055 - } 1056 - } 1057 - } 1058 - 1059 - // Third pass: Add remaining individual NSIDs 1060 - for nsid in nsids { 1061 - if !handled_nsids.contains(nsid) { 1062 - result.push(nsid.clone()); 1063 - } 1064 - } 1065 - 1066 - // Sort for consistent output 1067 - result.sort(); 1068 - result 1069 - }
+3 -2
mlf-cli/src/main.rs
··· 112 112 }, 113 113 } 114 114 115 - fn main() { 115 + #[tokio::main] 116 + async fn main() { 116 117 let cli = Cli::parse(); 117 118 118 119 let result: Result<(), miette::Report> = match cli.command { ··· 141 142 } 142 143 }, 143 144 Commands::Fetch { nsid, save, update, locked } => { 144 - fetch::run_fetch(nsid, save, update, locked).into_diagnostic() 145 + fetch::run_fetch(nsid, save, update, locked).await.into_diagnostic() 145 146 } 146 147 }; 147 148
+4
mlf-lang/tests/integration_test.rs
··· 11 11 #[serde(default)] 12 12 errors: Vec<ExpectedError>, 13 13 #[serde(default)] 14 + #[allow(dead_code)] 14 15 warnings: Vec<String>, 15 16 #[serde(flatten)] 17 + #[allow(dead_code)] 16 18 extra: HashMap<String, serde_json::Value>, 17 19 } 18 20 ··· 21 23 #[serde(rename = "type")] 22 24 error_type: String, 23 25 #[serde(default)] 26 + #[allow(dead_code)] 24 27 name: Option<String>, 25 28 #[serde(default)] 29 + #[allow(dead_code)] 26 30 message: Option<String>, 27 31 } 28 32
+17
mlf-lexicon-fetcher/Cargo.toml
··· 1 + [package] 2 + name = "mlf-lexicon-fetcher" 3 + version = "0.1.0" 4 + edition = "2024" 5 + license = "MIT" 6 + 7 + [dependencies] 8 + hickory-resolver = "0.24" 9 + thiserror = "2.0" 10 + serde = { version = "1.0", features = ["derive"] } 11 + serde_json = "1.0" 12 + reqwest = { version = "0.12", features = ["json"] } 13 + async-trait = "0.1" 14 + tokio = { version = "1", features = ["rt"] } 15 + 16 + [dev-dependencies] 17 + tokio = { version = "1", features = ["full"] }
+72
mlf-lexicon-fetcher/examples/usage.rs
··· 1 + // Example usage of mlf-lexicon-fetcher 2 + 3 + use mlf_lexicon_fetcher::{LexiconFetcher, MockDnsResolver, MockHttpClient}; 4 + use serde_json::json; 5 + 6 + #[tokio::main] 7 + async fn main() -> Result<(), Box<dyn std::error::Error>> { 8 + // Example 1: Fetch a single lexicon 9 + println!("=== Example 1: Fetch Single Lexicon ==="); 10 + 11 + let mut dns_resolver = MockDnsResolver::new(); 12 + dns_resolver.add_record("place.stream", "chat.profile", "did:plc:test123".to_string()); 13 + 14 + let mut http_client = MockHttpClient::new(); 15 + http_client.add_lexicon( 16 + "place.stream.chat.profile".to_string(), 17 + json!({ 18 + "lexicon": 1, 19 + "id": "place.stream.chat.profile", 20 + "defs": { 21 + "main": { 22 + "type": "record", 23 + "description": "A user profile for chat" 24 + } 25 + } 26 + }), 27 + ); 28 + 29 + let fetcher = LexiconFetcher::new(dns_resolver, http_client); 30 + 31 + match fetcher.fetch("place.stream.chat.profile").await { 32 + Ok(lexicon) => { 33 + println!("Successfully fetched lexicon:"); 34 + println!("{}", serde_json::to_string_pretty(&lexicon)?); 35 + } 36 + Err(e) => eprintln!("Error: {}", e), 37 + } 38 + 39 + // Example 2: Fetch multiple lexicons with a pattern 40 + println!("\n=== Example 2: Fetch Multiple Lexicons with Pattern ==="); 41 + 42 + let mut dns_resolver2 = MockDnsResolver::new(); 43 + dns_resolver2.add_record("app.bsky", "feed", "did:plc:bsky123".to_string()); 44 + 45 + let mut http_client2 = MockHttpClient::new(); 46 + http_client2.add_lexicon( 47 + "app.bsky.feed.post".to_string(), 48 + json!({"lexicon": 1, "id": "app.bsky.feed.post"}), 49 + ); 50 + http_client2.add_lexicon( 51 + "app.bsky.feed.like".to_string(), 52 + json!({"lexicon": 1, "id": "app.bsky.feed.like"}), 53 + ); 54 + http_client2.add_lexicon( 55 + "app.bsky.feed.repost".to_string(), 56 + json!({"lexicon": 1, "id": "app.bsky.feed.repost"}), 57 + ); 58 + 59 + let fetcher2 = LexiconFetcher::new(dns_resolver2, http_client2); 60 + 61 + match fetcher2.fetch_pattern("app.bsky.feed.*").await { 62 + Ok(lexicons) => { 63 + println!("Successfully fetched {} lexicons:", lexicons.len()); 64 + for (nsid, _lexicon) in lexicons { 65 + println!(" - {}", nsid); 66 + } 67 + } 68 + Err(e) => eprintln!("Error: {}", e), 69 + } 70 + 71 + Ok(()) 72 + }
+880
mlf-lexicon-fetcher/src/lib.rs
··· 1 + // MLF Lexicon Fetcher 2 + // Resolves ATProto lexicon NSIDs to DIDs via DNS TXT records 3 + // and fetches lexicon JSON via HTTP 4 + 5 + use async_trait::async_trait; 6 + use hickory_resolver::config::{ResolverConfig, ResolverOpts}; 7 + use hickory_resolver::TokioAsyncResolver; 8 + use serde::Deserialize; 9 + use std::collections::HashMap; 10 + use std::sync::{Arc, Mutex}; 11 + use thiserror::Error; 12 + 13 + #[derive(Debug, Deserialize)] 14 + struct AtProtoRecord { 15 + uri: String, 16 + value: serde_json::Value, 17 + } 18 + 19 + #[derive(Error, Debug)] 20 + pub enum LexiconFetcherError { 21 + #[error("Failed to create DNS resolver: {0}")] 22 + ResolverCreationFailed(String), 23 + 24 + #[error("DNS lookup failed for {domain}: {error}")] 25 + LookupFailed { domain: String, error: String }, 26 + 27 + #[error("No DID found in TXT record for {0}")] 28 + NoDid(String), 29 + 30 + #[error("Invalid NSID format: {0}")] 31 + InvalidNsid(String), 32 + 33 + #[error("HTTP request failed: {0}")] 34 + HttpRequestFailed(String), 35 + 36 + #[error("Failed to parse JSON response: {0}")] 37 + JsonParseFailed(String), 38 + 39 + #[error("Lexicon not found: {0}")] 40 + LexiconNotFound(String), 41 + 42 + #[error("Invalid URL: {0}")] 43 + InvalidUrl(String), 44 + } 45 + 46 + pub type Result<T> = std::result::Result<T, LexiconFetcherError>; 47 + 48 + /// Trait for DNS resolution - allows mocking in tests 49 + #[async_trait] 50 + pub trait DnsResolver: Send + Sync { 51 + /// Resolve an NSID to a DID via DNS TXT lookup 52 + async fn resolve_lexicon_did(&self, authority: &str, name_segments: &str) -> Result<String>; 53 + } 54 + 55 + /// Real DNS resolver using hickory_resolver's async resolver 56 + pub struct RealDnsResolver { 57 + resolver: TokioAsyncResolver, 58 + } 59 + 60 + impl RealDnsResolver { 61 + pub async fn new() -> Result<Self> { 62 + let resolver = TokioAsyncResolver::tokio(ResolverConfig::default(), ResolverOpts::default()); 63 + Ok(Self { resolver }) 64 + } 65 + 66 + pub async fn with_config(config: ResolverConfig, opts: ResolverOpts) -> Result<Self> { 67 + let resolver = TokioAsyncResolver::tokio(config, opts); 68 + Ok(Self { resolver }) 69 + } 70 + } 71 + 72 + #[async_trait] 73 + impl DnsResolver for RealDnsResolver { 74 + async fn resolve_lexicon_did(&self, authority: &str, name_segments: &str) -> Result<String> { 75 + let dns_name = construct_dns_name(authority, name_segments); 76 + 77 + // Lookup TXT records (async) 78 + let response = self 79 + .resolver 80 + .txt_lookup(&dns_name) 81 + .await 82 + .map_err(|e| LexiconFetcherError::LookupFailed { 83 + domain: dns_name.clone(), 84 + error: e.to_string(), 85 + })?; 86 + 87 + // Parse TXT records to find DID 88 + for txt_record in response.iter() { 89 + for txt_data in txt_record.txt_data() { 90 + let text = String::from_utf8_lossy(txt_data); 91 + // Look for "did=did:plc:..." or "did=did:web:..." 92 + if let Some(did_value) = text.strip_prefix("did=") { 93 + return Ok(did_value.trim().to_string()); 94 + } 95 + } 96 + } 97 + 98 + Err(LexiconFetcherError::NoDid(dns_name)) 99 + } 100 + } 101 + 102 + /// Mock DNS resolver for testing 103 + #[derive(Clone)] 104 + pub struct MockDnsResolver { 105 + records: Arc<Mutex<HashMap<String, String>>>, 106 + } 107 + 108 + impl MockDnsResolver { 109 + pub fn new() -> Self { 110 + Self { 111 + records: Arc::new(Mutex::new(HashMap::new())), 112 + } 113 + } 114 + 115 + /// Add a mock DNS record (maps NSID authority+name to DID) 116 + pub fn add_record(&mut self, authority: &str, name_segments: &str, did: String) { 117 + let dns_name = construct_dns_name(authority, name_segments); 118 + self.records.lock().unwrap().insert(dns_name, did); 119 + } 120 + 121 + /// Add a mock record using full NSID 122 + pub fn add_record_from_nsid(&mut self, nsid: &str, did: String) -> Result<()> { 123 + let (authority, name_segments) = parse_nsid(nsid)?; 124 + self.add_record(&authority, &name_segments, did); 125 + Ok(()) 126 + } 127 + } 128 + 129 + impl Default for MockDnsResolver { 130 + fn default() -> Self { 131 + Self::new() 132 + } 133 + } 134 + 135 + #[async_trait] 136 + impl DnsResolver for MockDnsResolver { 137 + async fn resolve_lexicon_did(&self, authority: &str, name_segments: &str) -> Result<String> { 138 + let dns_name = construct_dns_name(authority, name_segments); 139 + self.records 140 + .lock() 141 + .unwrap() 142 + .get(&dns_name) 143 + .cloned() 144 + .ok_or_else(|| LexiconFetcherError::LookupFailed { 145 + domain: dns_name.clone(), 146 + error: "No mock record found".to_string(), 147 + }) 148 + } 149 + } 150 + 151 + /// Construct DNS name from authority and name segments 152 + /// For "app.bsky" + "actor": "_lexicon.actor.bsky.app" 153 + /// For "place.stream" + "key": "_lexicon.key.stream.place" 154 + pub fn construct_dns_name(authority: &str, name_segments: &str) -> String { 155 + let auth_parts: Vec<&str> = authority.split('.').collect(); 156 + let reversed_auth: Vec<&str> = auth_parts.iter().rev().copied().collect(); 157 + 158 + if name_segments.is_empty() { 159 + // No name segments, just use reversed authority 160 + // For "place.stream": "_lexicon.stream.place" 161 + format!("_lexicon.{}", reversed_auth.join(".")) 162 + } else { 163 + // Prepend name segments before reversed authority 164 + // For "app.bsky" + "actor": "_lexicon.actor.bsky.app" 165 + format!("_lexicon.{}.{}", name_segments, reversed_auth.join(".")) 166 + } 167 + } 168 + 169 + /// Parse NSID into authority and name segments 170 + /// For "place.stream.key", returns ("place.stream", "key") 171 + /// For "app.bsky.actor.profile", returns ("app.bsky", "actor.profile") 172 + pub fn parse_nsid(nsid: &str) -> Result<(String, String)> { 173 + // NSID format: authority.name(.name)* 174 + // Authority is first 2 segments (reversed domain) 175 + let parts: Vec<&str> = nsid.split('.').collect(); 176 + 177 + if parts.len() < 2 { 178 + return Err(LexiconFetcherError::InvalidNsid(format!( 179 + "NSID must have at least 2 segments: {}", 180 + nsid 181 + ))); 182 + } 183 + 184 + // Authority is first 2 segments 185 + let authority = format!("{}.{}", parts[0], parts[1]); 186 + 187 + // Name segments are everything after the authority 188 + let name_segments = if parts.len() > 2 { 189 + parts[2..].join(".") 190 + } else { 191 + String::new() 192 + }; 193 + 194 + Ok((authority, name_segments)) 195 + } 196 + 197 + /// Trait for HTTP client - allows mocking in tests 198 + #[async_trait] 199 + pub trait HttpClient: Send + Sync { 200 + /// Fetch a single lexicon by NSID from a DID's server 201 + async fn fetch_lexicon(&self, did: &str, nsid: &str) -> Result<serde_json::Value>; 202 + 203 + /// Fetch all lexicons matching a pattern (e.g., "place.stream.*") 204 + async fn fetch_lexicons_pattern( 205 + &self, 206 + did: &str, 207 + pattern: &str, 208 + ) -> Result<Vec<(String, serde_json::Value)>>; 209 + } 210 + 211 + /// Real HTTP client using reqwest 212 + pub struct RealHttpClient { 213 + client: reqwest::Client, 214 + } 215 + 216 + impl RealHttpClient { 217 + pub fn new() -> Self { 218 + Self { 219 + client: reqwest::Client::new(), 220 + } 221 + } 222 + 223 + pub fn with_client(client: reqwest::Client) -> Self { 224 + Self { client } 225 + } 226 + } 227 + 228 + impl Default for RealHttpClient { 229 + fn default() -> Self { 230 + Self::new() 231 + } 232 + } 233 + 234 + #[async_trait] 235 + impl HttpClient for RealHttpClient { 236 + async fn fetch_lexicon(&self, did: &str, nsid: &str) -> Result<serde_json::Value> { 237 + // Fetch all records from the DID's repo 238 + let records = self.fetch_records_from_did(did).await?; 239 + 240 + // Find the specific NSID 241 + for record in records { 242 + let record_nsid = extract_nsid_from_record(&record)?; 243 + if record_nsid == nsid { 244 + return Ok(record.value); 245 + } 246 + } 247 + 248 + Err(LexiconFetcherError::LexiconNotFound(format!( 249 + "Lexicon {} not found in repo {}", 250 + nsid, did 251 + ))) 252 + } 253 + 254 + async fn fetch_lexicons_pattern( 255 + &self, 256 + did: &str, 257 + pattern: &str, 258 + ) -> Result<Vec<(String, serde_json::Value)>> { 259 + // Fetch all records from the DID's repo 260 + let records = self.fetch_records_from_did(did).await?; 261 + 262 + let mut results = Vec::new(); 263 + 264 + // Handle exact match (no wildcard) 265 + if !pattern.contains('*') && !pattern.contains('_') { 266 + for record in records { 267 + let record_nsid = extract_nsid_from_record(&record)?; 268 + if record_nsid == pattern { 269 + results.push((record_nsid, record.value)); 270 + } 271 + } 272 + return Ok(results); 273 + } 274 + 275 + // Handle wildcard patterns 276 + if pattern.ends_with(".*") { 277 + // "*" matches EVERYTHING 278 + // For "place.stream.*", match all: place.stream.chat, place.stream.chat.profile, etc. 279 + let base = pattern.strip_suffix(".*").unwrap(); 280 + let prefix_with_dot = format!("{}.", base); 281 + 282 + for record in records { 283 + let record_nsid = extract_nsid_from_record(&record)?; 284 + if record_nsid.starts_with(&prefix_with_dot) { 285 + results.push((record_nsid, record.value)); 286 + } 287 + } 288 + } else if pattern.ends_with("._") { 289 + // "_" matches only DIRECT CHILDREN 290 + // For "place.stream._", match place.stream.chat but NOT place.stream.chat.profile 291 + let base = pattern.strip_suffix("._").unwrap(); 292 + let prefix_with_dot = format!("{}.", base); 293 + 294 + for record in records { 295 + let record_nsid = extract_nsid_from_record(&record)?; 296 + 297 + if let Some(suffix) = record_nsid.strip_prefix(&prefix_with_dot) { 298 + // Check if it's a direct child (no more dots in the suffix) 299 + if !suffix.contains('.') && !suffix.is_empty() { 300 + results.push((record_nsid, record.value)); 301 + } 302 + } 303 + } 304 + } 305 + 306 + Ok(results) 307 + } 308 + } 309 + 310 + impl RealHttpClient { 311 + /// Fetch all lexicon records from a DID's ATProto repository 312 + async fn fetch_records_from_did(&self, did: &str) -> Result<Vec<AtProtoRecord>> { 313 + // Resolve DID to PDS URL 314 + let pds_url = self.resolve_did_to_pds(did).await?; 315 + 316 + let mut all_records = Vec::new(); 317 + let mut cursor: Option<String> = None; 318 + 319 + // Paginate through all records 320 + loop { 321 + let url = if let Some(ref c) = cursor { 322 + format!( 323 + "{}/xrpc/com.atproto.repo.listRecords?repo={}&collection=com.atproto.lexicon.schema&cursor={}", 324 + pds_url, did, c 325 + ) 326 + } else { 327 + format!( 328 + "{}/xrpc/com.atproto.repo.listRecords?repo={}&collection=com.atproto.lexicon.schema", 329 + pds_url, did 330 + ) 331 + }; 332 + 333 + let response = self 334 + .client 335 + .get(&url) 336 + .send() 337 + .await 338 + .map_err(|e| LexiconFetcherError::HttpRequestFailed(e.to_string()))?; 339 + 340 + if !response.status().is_success() { 341 + return Err(LexiconFetcherError::HttpRequestFailed(format!( 342 + "HTTP {} when fetching records from {}", 343 + response.status(), 344 + did 345 + ))); 346 + } 347 + 348 + let mut list_response: serde_json::Value = response 349 + .json() 350 + .await 351 + .map_err(|e| LexiconFetcherError::JsonParseFailed(e.to_string()))?; 352 + 353 + // Extract records 354 + if let Some(records_array) = list_response.get_mut("records") { 355 + if let Some(records) = records_array.as_array_mut() { 356 + for record_value in records.drain(..) { 357 + let record: AtProtoRecord = serde_json::from_value(record_value) 358 + .map_err(|e| LexiconFetcherError::JsonParseFailed(format!("Failed to parse record: {}", e)))?; 359 + all_records.push(record); 360 + } 361 + } 362 + } 363 + 364 + // Check for pagination cursor 365 + cursor = list_response 366 + .get("cursor") 367 + .and_then(|c| c.as_str()) 368 + .map(|s| s.to_string()); 369 + 370 + if cursor.is_none() { 371 + break; 372 + } 373 + } 374 + 375 + Ok(all_records) 376 + } 377 + 378 + /// Resolve a DID to its PDS URL 379 + async fn resolve_did_to_pds(&self, did: &str) -> Result<String> { 380 + // For did:web:, extract the domain 381 + if let Some(domain) = did.strip_prefix("did:web:") { 382 + return Ok(format!("https://{}", domain)); 383 + } 384 + 385 + // For did:plc:, query the PLC directory 386 + if did.starts_with("did:plc:") { 387 + let url = format!("https://plc.directory/{}", did); 388 + 389 + let response = self 390 + .client 391 + .get(&url) 392 + .send() 393 + .await 394 + .map_err(|e| LexiconFetcherError::HttpRequestFailed(format!("Failed to resolve DID: {}", e)))?; 395 + 396 + if !response.status().is_success() { 397 + return Err(LexiconFetcherError::HttpRequestFailed(format!( 398 + "Failed to resolve DID {}: HTTP {}", 399 + did, 400 + response.status() 401 + ))); 402 + } 403 + 404 + let did_doc: serde_json::Value = response 405 + .json() 406 + .await 407 + .map_err(|e| LexiconFetcherError::JsonParseFailed(format!("Failed to parse DID document: {}", e)))?; 408 + 409 + // Extract PDS endpoint from service array 410 + if let Some(services) = did_doc.get("service").and_then(|v| v.as_array()) { 411 + for service in services { 412 + if service.get("type").and_then(|v| v.as_str()) == Some("AtprotoPersonalDataServer") { 413 + if let Some(endpoint) = service.get("serviceEndpoint").and_then(|v| v.as_str()) { 414 + return Ok(endpoint.trim_end_matches('/').to_string()); 415 + } 416 + } 417 + } 418 + } 419 + 420 + return Err(LexiconFetcherError::HttpRequestFailed(format!( 421 + "No PDS endpoint found in DID document for {}", 422 + did 423 + ))); 424 + } 425 + 426 + Err(LexiconFetcherError::InvalidUrl(format!( 427 + "Unsupported DID format: {}", 428 + did 429 + ))) 430 + } 431 + } 432 + 433 + /// Mock HTTP client for testing 434 + #[derive(Clone)] 435 + pub struct MockHttpClient { 436 + lexicons: Arc<Mutex<HashMap<String, serde_json::Value>>>, 437 + } 438 + 439 + impl MockHttpClient { 440 + pub fn new() -> Self { 441 + Self { 442 + lexicons: Arc::new(Mutex::new(HashMap::new())), 443 + } 444 + } 445 + 446 + /// Add a mock lexicon response for a specific NSID 447 + pub fn add_lexicon(&mut self, nsid: String, lexicon: serde_json::Value) { 448 + self.lexicons.lock().unwrap().insert(nsid, lexicon); 449 + } 450 + } 451 + 452 + impl Default for MockHttpClient { 453 + fn default() -> Self { 454 + Self::new() 455 + } 456 + } 457 + 458 + #[async_trait] 459 + impl HttpClient for MockHttpClient { 460 + async fn fetch_lexicon(&self, _did: &str, nsid: &str) -> Result<serde_json::Value> { 461 + self.lexicons 462 + .lock() 463 + .unwrap() 464 + .get(nsid) 465 + .cloned() 466 + .ok_or_else(|| LexiconFetcherError::LexiconNotFound(nsid.to_string())) 467 + } 468 + 469 + async fn fetch_lexicons_pattern( 470 + &self, 471 + _did: &str, 472 + pattern: &str, 473 + ) -> Result<Vec<(String, serde_json::Value)>> { 474 + let lexicons = self.lexicons.lock().unwrap(); 475 + let mut results = Vec::new(); 476 + 477 + // Handle exact match (no wildcard) 478 + if !pattern.contains('*') && !pattern.contains('_') { 479 + if let Some(lexicon) = lexicons.get(pattern) { 480 + results.push((pattern.to_string(), lexicon.clone())); 481 + } 482 + return Ok(results); 483 + } 484 + 485 + // Handle wildcard patterns 486 + if pattern.ends_with(".*") { 487 + // "*" matches EVERYTHING 488 + // For "place.stream.*", match all: place.stream.chat, place.stream.chat.profile, etc. 489 + let base = pattern.strip_suffix(".*").unwrap(); 490 + let prefix_with_dot = format!("{}.", base); 491 + 492 + for (nsid, lexicon) in lexicons.iter() { 493 + if nsid.starts_with(&prefix_with_dot) { 494 + results.push((nsid.clone(), lexicon.clone())); 495 + } 496 + } 497 + } else if pattern.ends_with("._") { 498 + // "_" matches only DIRECT CHILDREN 499 + // For "place.stream._", match place.stream.chat but NOT place.stream.chat.profile 500 + let base = pattern.strip_suffix("._").unwrap(); 501 + let prefix_with_dot = format!("{}.", base); 502 + 503 + for (nsid, lexicon) in lexicons.iter() { 504 + if let Some(suffix) = nsid.strip_prefix(&prefix_with_dot) { 505 + // Check if it's a direct child (no more dots in the suffix) 506 + if !suffix.contains('.') && !suffix.is_empty() { 507 + results.push((nsid.clone(), lexicon.clone())); 508 + } 509 + } 510 + } 511 + } 512 + 513 + Ok(results) 514 + } 515 + } 516 + 517 + /// Main lexicon fetcher that combines DNS resolution and HTTP fetching 518 + pub struct LexiconFetcher<D: DnsResolver, H: HttpClient> { 519 + dns_resolver: D, 520 + http_client: H, 521 + } 522 + 523 + impl<D: DnsResolver, H: HttpClient> LexiconFetcher<D, H> { 524 + pub fn new(dns_resolver: D, http_client: H) -> Self { 525 + Self { 526 + dns_resolver, 527 + http_client, 528 + } 529 + } 530 + 531 + /// Fetch a single lexicon by NSID 532 + /// Example: "place.stream.chat.profile" -> single lexicon JSON 533 + pub async fn fetch(&self, nsid: &str) -> Result<serde_json::Value> { 534 + // Check if this is a wildcard pattern 535 + if nsid.contains('*') || nsid.contains('_') { 536 + return Err(LexiconFetcherError::InvalidNsid(format!( 537 + "Use fetch_pattern() for wildcard patterns (* or _): {}", 538 + nsid 539 + ))); 540 + } 541 + 542 + // Parse NSID into authority and name segments 543 + let (authority, name_segments) = parse_nsid(nsid)?; 544 + 545 + // Resolve DID via DNS (async) 546 + let did = self.dns_resolver.resolve_lexicon_did(&authority, &name_segments).await?; 547 + 548 + // Fetch lexicon via HTTP 549 + self.http_client.fetch_lexicon(&did, nsid).await 550 + } 551 + 552 + /// Fetch all lexicons matching a pattern 553 + /// Examples: 554 + /// - "place.stream.*" -> matches everything (place.stream.chat, place.stream.chat.profile, etc.) 555 + /// - "place.stream._" -> matches direct children only (place.stream.chat, place.stream.key, but not place.stream.chat.profile) 556 + pub async fn fetch_pattern(&self, pattern: &str) -> Result<Vec<(String, serde_json::Value)>> { 557 + // Parse pattern to extract authority 558 + let (authority, name_pattern) = parse_nsid(pattern)?; 559 + 560 + // For DNS lookup, remove wildcard suffix (both .* and ._) 561 + // For "place.stream.*" or "place.stream._", name_pattern is "*" or "_", so we use empty string 562 + // For "place.stream.chat.*", name_pattern is "chat.*", so we use "chat" 563 + let dns_name_segments = if name_pattern == "*" || name_pattern == "_" { 564 + "" 565 + } else if let Some(pos) = name_pattern.rfind(".*") { 566 + &name_pattern[..pos] 567 + } else if let Some(pos) = name_pattern.rfind("._") { 568 + &name_pattern[..pos] 569 + } else { 570 + &name_pattern 571 + }; 572 + 573 + // Resolve DID via DNS (async) 574 + let did = self.dns_resolver.resolve_lexicon_did(&authority, dns_name_segments).await?; 575 + 576 + // Fetch lexicons matching pattern via HTTP 577 + self.http_client.fetch_lexicons_pattern(&did, pattern).await 578 + } 579 + } 580 + 581 + /// Metadata about a fetched lexicon 582 + #[derive(Debug, Clone)] 583 + pub struct FetchedLexicon { 584 + pub nsid: String, 585 + pub lexicon: serde_json::Value, 586 + pub did: String, 587 + } 588 + 589 + /// Result of fetching one or more lexicons 590 + #[derive(Debug)] 591 + pub struct FetchResult { 592 + pub lexicons: Vec<FetchedLexicon>, 593 + } 594 + 595 + /// Convenience type for production use with real DNS and HTTP 596 + pub type ProductionLexiconFetcher = LexiconFetcher<RealDnsResolver, RealHttpClient>; 597 + 598 + impl ProductionLexiconFetcher { 599 + /// Create a new production fetcher with default configuration 600 + pub async fn production() -> Result<Self> { 601 + Ok(Self::new(RealDnsResolver::new().await?, RealHttpClient::new())) 602 + } 603 + } 604 + 605 + impl<D: DnsResolver, H: HttpClient> LexiconFetcher<D, H> { 606 + /// Fetch one or more lexicons and return metadata for lockfile tracking 607 + /// Handles both exact NSIDs and patterns (* or _) 608 + pub async fn fetch_with_metadata(&self, nsid: &str) -> Result<FetchResult> { 609 + // Parse pattern to extract authority and name segments 610 + let (authority, name_segments) = parse_nsid(nsid)?; 611 + 612 + // For DNS lookup, remove wildcard suffix (both .* and ._) 613 + let dns_name_segments = if name_segments == "*" || name_segments == "_" { 614 + "" 615 + } else if let Some(pos) = name_segments.rfind(".*") { 616 + &name_segments[..pos] 617 + } else if let Some(pos) = name_segments.rfind("._") { 618 + &name_segments[..pos] 619 + } else { 620 + &name_segments 621 + }; 622 + 623 + // Resolve DID via DNS (async) 624 + let did = self.dns_resolver.resolve_lexicon_did(&authority, dns_name_segments).await?; 625 + 626 + // Fetch lexicons 627 + let lexicons = if nsid.contains('*') || nsid.contains('_') { 628 + self.http_client.fetch_lexicons_pattern(&did, nsid).await? 629 + } else { 630 + let lexicon = self.http_client.fetch_lexicon(&did, nsid).await?; 631 + vec![(nsid.to_string(), lexicon)] 632 + }; 633 + 634 + // Package results with metadata 635 + let fetched_lexicons = lexicons 636 + .into_iter() 637 + .map(|(nsid, lexicon)| FetchedLexicon { 638 + nsid, 639 + lexicon, 640 + did: did.clone(), 641 + }) 642 + .collect(); 643 + 644 + Ok(FetchResult { 645 + lexicons: fetched_lexicons, 646 + }) 647 + } 648 + 649 + /// Fetch multiple NSIDs in sequence (no optimization) 650 + pub async fn fetch_many(&self, nsids: &[String]) -> Result<Vec<FetchResult>> { 651 + let mut results = Vec::new(); 652 + for nsid in nsids { 653 + results.push(self.fetch_with_metadata(nsid).await?); 654 + } 655 + Ok(results) 656 + } 657 + 658 + /// Fetch multiple NSIDs with optimization to reduce network requests 659 + /// Groups similar NSIDs into wildcard patterns when beneficial 660 + /// Example: ["app.bsky.actor.foo", "app.bsky.actor.bar"] -> fetches "app.bsky.actor.*" 661 + pub async fn fetch_many_optimized(&self, nsids: &[String]) -> Result<Vec<FetchResult>> { 662 + use std::collections::HashSet; 663 + 664 + if nsids.is_empty() { 665 + return Ok(Vec::new()); 666 + } 667 + 668 + // Convert to HashSet for optimization 669 + let nsids_set: HashSet<String> = nsids.iter().cloned().collect(); 670 + 671 + // Optimize into minimal set of patterns 672 + let optimized_patterns = optimize_fetch_patterns(&nsids_set); 673 + 674 + // Fetch each optimized pattern 675 + let mut results = Vec::new(); 676 + for pattern in optimized_patterns { 677 + results.push(self.fetch_with_metadata(&pattern).await?); 678 + } 679 + 680 + Ok(results) 681 + } 682 + 683 + /// Fetch lexicon(s) from a known DID, bypassing DNS resolution 684 + /// Useful when fetching from lockfile where DID is already known 685 + /// Handles both exact NSIDs and patterns (* or _) 686 + pub async fn fetch_from_did_with_metadata(&self, did: &str, nsid: &str) -> Result<FetchResult> { 687 + // Fetch lexicons directly from the DID 688 + let lexicons = if nsid.contains('*') || nsid.contains('_') { 689 + self.http_client.fetch_lexicons_pattern(did, nsid).await? 690 + } else { 691 + let lexicon = self.http_client.fetch_lexicon(did, nsid).await?; 692 + vec![(nsid.to_string(), lexicon)] 693 + }; 694 + 695 + // Package results with metadata 696 + let fetched_lexicons = lexicons 697 + .into_iter() 698 + .map(|(nsid, lexicon)| FetchedLexicon { 699 + nsid, 700 + lexicon, 701 + did: did.to_string(), 702 + }) 703 + .collect(); 704 + 705 + Ok(FetchResult { 706 + lexicons: fetched_lexicons, 707 + }) 708 + } 709 + } 710 + 711 + /// Convenience type for testing with mocks 712 + pub type MockLexiconFetcher = LexiconFetcher<MockDnsResolver, MockHttpClient>; 713 + 714 + impl MockLexiconFetcher { 715 + /// Create a new mock fetcher for testing 716 + pub fn mock() -> Self { 717 + Self::new(MockDnsResolver::new(), MockHttpClient::new()) 718 + } 719 + } 720 + 721 + /// Extract NSID from an ATProto record 722 + fn extract_nsid_from_record(record: &AtProtoRecord) -> Result<String> { 723 + // The record value should have an "id" field with the NSID 724 + if let Some(id) = record.value.get("id").and_then(|v| v.as_str()) { 725 + return Ok(id.to_string()); 726 + } 727 + 728 + // Fallback: try to extract from URI 729 + // URI format: at://did:plc:xxx/com.atproto.lexicon.schema/nsid 730 + if let Some(rkey) = record.uri.split('/').last() { 731 + return Ok(rkey.to_string()); 732 + } 733 + 734 + Err(LexiconFetcherError::HttpRequestFailed(format!( 735 + "Could not extract NSID from record: {}", 736 + record.uri 737 + ))) 738 + } 739 + 740 + /// Optimize a set of NSIDs by collapsing them into the minimal set of fetch patterns 741 + /// For example: ["app.bsky.actor.foo", "app.bsky.actor.bar"] -> ["app.bsky.actor.*"] 742 + /// This function tries multiple grouping strategies to find the most efficient pattern 743 + pub fn optimize_fetch_patterns(nsids: &std::collections::HashSet<String>) -> Vec<String> { 744 + use std::collections::{BTreeMap, HashSet}; 745 + 746 + if nsids.is_empty() { 747 + return Vec::new(); 748 + } 749 + 750 + // Strategy 1: Try grouping by authority (first 2 segments) 751 + // e.g., ["app.bsky.actor.foo", "app.bsky.feed.bar"] -> ["app.bsky.*"] 752 + let mut authority_groups: BTreeMap<String, Vec<String>> = BTreeMap::new(); 753 + 754 + for nsid in nsids { 755 + let parts: Vec<&str> = nsid.split('.').collect(); 756 + if parts.len() >= 2 { 757 + let authority = format!("{}.{}", parts[0], parts[1]); 758 + authority_groups.entry(authority).or_insert_with(Vec::new).push(nsid.clone()); 759 + } 760 + } 761 + 762 + // Strategy 2: Try grouping by namespace prefix (all but last segment) 763 + // e.g., ["app.bsky.actor.foo", "app.bsky.actor.bar"] -> ["app.bsky.actor.*"] 764 + let mut prefix_groups: BTreeMap<String, Vec<String>> = BTreeMap::new(); 765 + 766 + for nsid in nsids { 767 + let parts: Vec<&str> = nsid.split('.').collect(); 768 + if parts.len() >= 3 { 769 + let prefix = parts[..parts.len() - 1].join("."); 770 + prefix_groups.entry(prefix).or_insert_with(Vec::new).push(nsid.clone()); 771 + } 772 + } 773 + 774 + let mut result = Vec::new(); 775 + let mut handled_nsids = HashSet::new(); 776 + 777 + // First pass: Apply namespace-level grouping (more specific) 778 + for (prefix, group) in &prefix_groups { 779 + if group.len() >= 2 && !handled_nsids.contains(&group[0]) { 780 + result.push(format!("{}.*", prefix)); 781 + for nsid in group { 782 + handled_nsids.insert(nsid.clone()); 783 + } 784 + } 785 + } 786 + 787 + // Second pass: For remaining NSIDs, consider authority-level grouping 788 + // Only use authority wildcard if we have 3+ different namespaces under same authority 789 + for (authority, group) in &authority_groups { 790 + let unhandled: Vec<&String> = group.iter() 791 + .filter(|nsid| !handled_nsids.contains(*nsid)) 792 + .collect(); 793 + 794 + if unhandled.len() >= 3 { 795 + result.push(format!("{}.*", authority)); 796 + for nsid in &unhandled { 797 + handled_nsids.insert((*nsid).clone()); 798 + } 799 + } 800 + } 801 + 802 + // Third pass: Add remaining individual NSIDs 803 + for nsid in nsids { 804 + if !handled_nsids.contains(nsid) { 805 + result.push(nsid.clone()); 806 + } 807 + } 808 + 809 + // Sort for consistent output 810 + result.sort(); 811 + result 812 + } 813 + 814 + #[cfg(test)] 815 + mod tests { 816 + use super::*; 817 + 818 + #[test] 819 + fn test_construct_dns_name() { 820 + assert_eq!( 821 + construct_dns_name("place.stream", "key"), 822 + "_lexicon.key.stream.place" 823 + ); 824 + assert_eq!( 825 + construct_dns_name("app.bsky", "actor"), 826 + "_lexicon.actor.bsky.app" 827 + ); 828 + assert_eq!( 829 + construct_dns_name("app.bsky", "actor.profile"), 830 + "_lexicon.actor.profile.bsky.app" 831 + ); 832 + assert_eq!( 833 + construct_dns_name("place.stream", ""), 834 + "_lexicon.stream.place" 835 + ); 836 + } 837 + 838 + #[test] 839 + fn test_parse_nsid() { 840 + let (auth, name) = parse_nsid("place.stream.key").unwrap(); 841 + assert_eq!(auth, "place.stream"); 842 + assert_eq!(name, "key"); 843 + 844 + let (auth, name) = parse_nsid("app.bsky.actor.profile").unwrap(); 845 + assert_eq!(auth, "app.bsky"); 846 + assert_eq!(name, "actor.profile"); 847 + 848 + let (auth, name) = parse_nsid("place.stream").unwrap(); 849 + assert_eq!(auth, "place.stream"); 850 + assert_eq!(name, ""); 851 + 852 + assert!(parse_nsid("invalid").is_err()); 853 + } 854 + 855 + #[tokio::test] 856 + async fn test_mock_dns_resolver() { 857 + let mut resolver = MockDnsResolver::new(); 858 + resolver.add_record("place.stream", "key", "did:plc:test123".to_string()); 859 + 860 + let did = resolver.resolve_lexicon_did("place.stream", "key").await.unwrap(); 861 + assert_eq!(did, "did:plc:test123"); 862 + 863 + let result = resolver.resolve_lexicon_did("place.stream", "notfound").await; 864 + assert!(result.is_err()); 865 + } 866 + 867 + #[tokio::test] 868 + async fn test_mock_dns_resolver_from_nsid() { 869 + let mut resolver = MockDnsResolver::new(); 870 + resolver 871 + .add_record_from_nsid("app.bsky.actor.profile", "did:plc:bsky123".to_string()) 872 + .unwrap(); 873 + 874 + let did = resolver 875 + .resolve_lexicon_did("app.bsky", "actor.profile") 876 + .await 877 + .unwrap(); 878 + assert_eq!(did, "did:plc:bsky123"); 879 + } 880 + }
+286
mlf-lexicon-fetcher/tests/dns_scenarios.rs
··· 1 + // Comprehensive DNS resolver tests covering various scenarios 2 + 3 + use mlf_lexicon_fetcher::{DnsResolver, MockDnsResolver}; 4 + 5 + #[tokio::test] 6 + async fn test_successful_lookup() { 7 + let mut resolver = MockDnsResolver::new(); 8 + resolver.add_record("place.stream", "key", "did:plc:abc123def456".to_string()); 9 + 10 + let result = resolver.resolve_lexicon_did("place.stream", "key").await; 11 + assert!(result.is_ok()); 12 + assert_eq!(result.unwrap(), "did:plc:abc123def456"); 13 + } 14 + 15 + #[tokio::test] 16 + async fn test_dns_record_not_found() { 17 + let resolver = MockDnsResolver::new(); 18 + let result = resolver.resolve_lexicon_did("nonexistent.domain", "test").await; 19 + assert!(result.is_err()); 20 + assert!(result.unwrap_err().to_string().contains("No mock record found")); 21 + } 22 + 23 + #[tokio::test] 24 + async fn test_multiple_authority_types() { 25 + let mut resolver = MockDnsResolver::new(); 26 + 27 + // app.bsky authority 28 + resolver.add_record("app.bsky", "actor", "did:plc:bsky001".to_string()); 29 + 30 + // place.stream authority 31 + resolver.add_record("place.stream", "chat", "did:plc:stream001".to_string()); 32 + 33 + // com.atproto authority 34 + resolver.add_record("com.atproto", "repo", "did:plc:atproto001".to_string()); 35 + 36 + assert_eq!( 37 + resolver.resolve_lexicon_did("app.bsky", "actor").await.unwrap(), 38 + "did:plc:bsky001" 39 + ); 40 + assert_eq!( 41 + resolver.resolve_lexicon_did("place.stream", "chat").await.unwrap(), 42 + "did:plc:stream001" 43 + ); 44 + assert_eq!( 45 + resolver.resolve_lexicon_did("com.atproto", "repo").await.unwrap(), 46 + "did:plc:atproto001" 47 + ); 48 + } 49 + 50 + #[tokio::test] 51 + async fn test_nested_name_segments() { 52 + let mut resolver = MockDnsResolver::new(); 53 + 54 + // Single segment 55 + resolver.add_record("app.bsky", "actor", "did:plc:single".to_string()); 56 + 57 + // Two segments 58 + resolver.add_record("app.bsky", "actor.profile", "did:plc:double".to_string()); 59 + 60 + // Three segments 61 + resolver.add_record("app.bsky", "actor.profile.detailed", "did:plc:triple".to_string()); 62 + 63 + assert_eq!( 64 + resolver.resolve_lexicon_did("app.bsky", "actor").await.unwrap(), 65 + "did:plc:single" 66 + ); 67 + assert_eq!( 68 + resolver.resolve_lexicon_did("app.bsky", "actor.profile").await.unwrap(), 69 + "did:plc:double" 70 + ); 71 + assert_eq!( 72 + resolver.resolve_lexicon_did("app.bsky", "actor.profile.detailed").await.unwrap(), 73 + "did:plc:triple" 74 + ); 75 + } 76 + 77 + #[tokio::test] 78 + async fn test_empty_name_segments() { 79 + let mut resolver = MockDnsResolver::new(); 80 + 81 + // Authority only (no name segments) 82 + resolver.add_record("place.stream", "", "did:plc:root".to_string()); 83 + 84 + assert_eq!( 85 + resolver.resolve_lexicon_did("place.stream", "").await.unwrap(), 86 + "did:plc:root" 87 + ); 88 + } 89 + 90 + #[tokio::test] 91 + async fn test_did_web_format() { 92 + let mut resolver = MockDnsResolver::new(); 93 + 94 + resolver.add_record("example.com", "api", "did:web:example.com".to_string()); 95 + 96 + assert_eq!( 97 + resolver.resolve_lexicon_did("example.com", "api").await.unwrap(), 98 + "did:web:example.com" 99 + ); 100 + } 101 + 102 + #[tokio::test] 103 + async fn test_did_plc_format() { 104 + let mut resolver = MockDnsResolver::new(); 105 + 106 + // Real-world style PLC DIDs 107 + resolver.add_record( 108 + "app.bsky", 109 + "feed", 110 + "did:plc:z72i7hdynmk6r22z27h6tvur".to_string() 111 + ); 112 + 113 + let did = resolver.resolve_lexicon_did("app.bsky", "feed").await.unwrap(); 114 + assert!(did.starts_with("did:plc:")); 115 + assert_eq!(did.len(), 32); // "did:plc:" (8) + 24 chars 116 + } 117 + 118 + #[tokio::test] 119 + async fn test_add_record_from_nsid() { 120 + let mut resolver = MockDnsResolver::new(); 121 + 122 + // Add using full NSID 123 + resolver.add_record_from_nsid("place.stream.key", "did:plc:test".to_string()).unwrap(); 124 + resolver.add_record_from_nsid("app.bsky.actor.profile", "did:plc:bsky".to_string()).unwrap(); 125 + 126 + assert_eq!( 127 + resolver.resolve_lexicon_did("place.stream", "key").await.unwrap(), 128 + "did:plc:test" 129 + ); 130 + assert_eq!( 131 + resolver.resolve_lexicon_did("app.bsky", "actor.profile").await.unwrap(), 132 + "did:plc:bsky" 133 + ); 134 + } 135 + 136 + #[tokio::test] 137 + async fn test_invalid_nsid_format() { 138 + let mut resolver = MockDnsResolver::new(); 139 + 140 + // NSID with only one segment 141 + let result = resolver.add_record_from_nsid("invalid", "did:plc:test".to_string()); 142 + assert!(result.is_err()); 143 + assert!(result.unwrap_err().to_string().contains("at least 2 segments")); 144 + } 145 + 146 + #[tokio::test] 147 + async fn test_case_sensitivity() { 148 + let mut resolver = MockDnsResolver::new(); 149 + 150 + // ATProto NSIDs are case-sensitive 151 + resolver.add_record("App.Bsky", "Actor", "did:plc:uppercase".to_string()); 152 + resolver.add_record("app.bsky", "actor", "did:plc:lowercase".to_string()); 153 + 154 + // These should be treated as different domains 155 + assert_eq!( 156 + resolver.resolve_lexicon_did("App.Bsky", "Actor").await.unwrap(), 157 + "did:plc:uppercase" 158 + ); 159 + assert_eq!( 160 + resolver.resolve_lexicon_did("app.bsky", "actor").await.unwrap(), 161 + "did:plc:lowercase" 162 + ); 163 + } 164 + 165 + #[tokio::test] 166 + async fn test_concurrent_lookups() { 167 + use std::sync::Arc; 168 + 169 + let mut resolver = MockDnsResolver::new(); 170 + resolver.add_record("app.bsky", "feed", "did:plc:concurrent".to_string()); 171 + 172 + let resolver = Arc::new(resolver); 173 + let mut handles = vec![]; 174 + 175 + // Spawn 10 tasks doing concurrent lookups 176 + for _ in 0..10 { 177 + let resolver_clone = Arc::clone(&resolver); 178 + let handle = tokio::spawn(async move { 179 + resolver_clone.resolve_lexicon_did("app.bsky", "feed").await.unwrap() 180 + }); 181 + handles.push(handle); 182 + } 183 + 184 + // All should succeed 185 + for handle in handles { 186 + assert_eq!(handle.await.unwrap(), "did:plc:concurrent"); 187 + } 188 + } 189 + 190 + #[tokio::test] 191 + async fn test_wildcard_namespace_scenarios() { 192 + let mut resolver = MockDnsResolver::new(); 193 + 194 + // Simulate multiple lexicons under same namespace 195 + resolver.add_record("app.bsky", "actor.defs", "did:plc:bsky".to_string()); 196 + resolver.add_record("app.bsky", "actor.profile", "did:plc:bsky".to_string()); 197 + resolver.add_record("app.bsky", "feed.post", "did:plc:bsky".to_string()); 198 + resolver.add_record("app.bsky", "feed.like", "did:plc:bsky".to_string()); 199 + 200 + // All should resolve to the same DID 201 + assert_eq!( 202 + resolver.resolve_lexicon_did("app.bsky", "actor.defs").await.unwrap(), 203 + "did:plc:bsky" 204 + ); 205 + assert_eq!( 206 + resolver.resolve_lexicon_did("app.bsky", "feed.post").await.unwrap(), 207 + "did:plc:bsky" 208 + ); 209 + } 210 + 211 + #[tokio::test] 212 + async fn test_dns_name_construction() { 213 + use mlf_lexicon_fetcher::construct_dns_name; 214 + 215 + // Test various NSID patterns 216 + assert_eq!( 217 + construct_dns_name("place.stream", "key"), 218 + "_lexicon.key.stream.place" 219 + ); 220 + assert_eq!( 221 + construct_dns_name("app.bsky", "actor"), 222 + "_lexicon.actor.bsky.app" 223 + ); 224 + assert_eq!( 225 + construct_dns_name("com.atproto", "repo.strongRef"), 226 + "_lexicon.repo.strongRef.atproto.com" 227 + ); 228 + } 229 + 230 + #[tokio::test] 231 + async fn test_nsid_parsing() { 232 + use mlf_lexicon_fetcher::parse_nsid; 233 + 234 + // Test various NSID formats 235 + let (auth, name) = parse_nsid("place.stream.key").unwrap(); 236 + assert_eq!(auth, "place.stream"); 237 + assert_eq!(name, "key"); 238 + 239 + let (auth, name) = parse_nsid("app.bsky.actor.profile").unwrap(); 240 + assert_eq!(auth, "app.bsky"); 241 + assert_eq!(name, "actor.profile"); 242 + 243 + let (auth, name) = parse_nsid("com.atproto.repo.strongRef").unwrap(); 244 + assert_eq!(auth, "com.atproto"); 245 + assert_eq!(name, "repo.strongRef"); 246 + } 247 + 248 + #[tokio::test] 249 + async fn test_edge_case_empty_did() { 250 + let mut resolver = MockDnsResolver::new(); 251 + 252 + // Empty DID should work (though not valid in practice) 253 + resolver.add_record("test.com", "api", "".to_string()); 254 + 255 + assert_eq!( 256 + resolver.resolve_lexicon_did("test.com", "api").await.unwrap(), 257 + "" 258 + ); 259 + } 260 + 261 + #[tokio::test] 262 + async fn test_very_long_nsid() { 263 + let mut resolver = MockDnsResolver::new(); 264 + 265 + // Very long name segments 266 + let long_name = "very.long.deeply.nested.namespace.path.to.definition"; 267 + resolver.add_record("app.bsky", long_name, "did:plc:deep".to_string()); 268 + 269 + assert_eq!( 270 + resolver.resolve_lexicon_did("app.bsky", long_name).await.unwrap(), 271 + "did:plc:deep" 272 + ); 273 + } 274 + 275 + #[tokio::test] 276 + async fn test_special_characters_in_nsid() { 277 + let mut resolver = MockDnsResolver::new(); 278 + 279 + // Hyphens and numbers are valid in domain names 280 + resolver.add_record("app-test.bsky-123", "actor", "did:plc:special".to_string()); 281 + 282 + assert_eq!( 283 + resolver.resolve_lexicon_did("app-test.bsky-123", "actor").await.unwrap(), 284 + "did:plc:special" 285 + ); 286 + }
+360
mlf-lexicon-fetcher/tests/lexicon_fetching.rs
··· 1 + // Integration tests for full lexicon fetching flow (DNS + HTTP) 2 + 3 + use mlf_lexicon_fetcher::{ 4 + LexiconFetcher, MockDnsResolver, MockHttpClient, LexiconFetcherError, 5 + }; 6 + use serde_json::json; 7 + 8 + #[tokio::test] 9 + async fn test_fetch_single_lexicon() { 10 + // Setup mock DNS resolver 11 + let mut dns_resolver = MockDnsResolver::new(); 12 + dns_resolver.add_record("place.stream", "chat.profile", "did:plc:test123".to_string()); 13 + 14 + // Setup mock HTTP client 15 + let mut http_client = MockHttpClient::new(); 16 + let lexicon_json = json!({ 17 + "lexicon": 1, 18 + "id": "place.stream.chat.profile", 19 + "defs": { 20 + "main": { 21 + "type": "record", 22 + "description": "A chat profile record" 23 + } 24 + } 25 + }); 26 + http_client.add_lexicon("place.stream.chat.profile".to_string(), lexicon_json.clone()); 27 + 28 + // Create fetcher 29 + let fetcher = LexiconFetcher::new(dns_resolver, http_client); 30 + 31 + // Fetch single lexicon 32 + let result = fetcher.fetch("place.stream.chat.profile").await; 33 + assert!(result.is_ok()); 34 + let fetched = result.unwrap(); 35 + assert_eq!(fetched.get("id").unwrap().as_str().unwrap(), "place.stream.chat.profile"); 36 + } 37 + 38 + #[tokio::test] 39 + async fn test_fetch_pattern_multiple_lexicons() { 40 + // Setup mock DNS resolver 41 + let mut dns_resolver = MockDnsResolver::new(); 42 + dns_resolver.add_record("place.stream", "chat", "did:plc:test123".to_string()); 43 + 44 + // Setup mock HTTP client with multiple lexicons 45 + let mut http_client = MockHttpClient::new(); 46 + 47 + let profile_lexicon = json!({ 48 + "lexicon": 1, 49 + "id": "place.stream.chat.profile", 50 + "defs": { "main": { "type": "record" } } 51 + }); 52 + let message_lexicon = json!({ 53 + "lexicon": 1, 54 + "id": "place.stream.chat.message", 55 + "defs": { "main": { "type": "record" } } 56 + }); 57 + let room_lexicon = json!({ 58 + "lexicon": 1, 59 + "id": "place.stream.chat.room", 60 + "defs": { "main": { "type": "record" } } 61 + }); 62 + 63 + http_client.add_lexicon("place.stream.chat.profile".to_string(), profile_lexicon); 64 + http_client.add_lexicon("place.stream.chat.message".to_string(), message_lexicon); 65 + http_client.add_lexicon("place.stream.chat.room".to_string(), room_lexicon); 66 + 67 + // Create fetcher 68 + let fetcher = LexiconFetcher::new(dns_resolver, http_client); 69 + 70 + // Fetch pattern 71 + let result = fetcher.fetch_pattern("place.stream.chat.*").await; 72 + assert!(result.is_ok()); 73 + let lexicons = result.unwrap(); 74 + 75 + // Should get 3 lexicons 76 + assert_eq!(lexicons.len(), 3); 77 + 78 + // Check all NSIDs are present 79 + let nsids: Vec<&str> = lexicons.iter().map(|(nsid, _)| nsid.as_str()).collect(); 80 + assert!(nsids.contains(&"place.stream.chat.profile")); 81 + assert!(nsids.contains(&"place.stream.chat.message")); 82 + assert!(nsids.contains(&"place.stream.chat.room")); 83 + } 84 + 85 + #[tokio::test] 86 + async fn test_fetch_lexicon_not_found() { 87 + // Setup mock DNS resolver 88 + let mut dns_resolver = MockDnsResolver::new(); 89 + dns_resolver.add_record("place.stream", "chat.profile", "did:plc:test123".to_string()); 90 + 91 + // Setup mock HTTP client (but don't add the lexicon) 92 + let http_client = MockHttpClient::new(); 93 + 94 + // Create fetcher 95 + let fetcher = LexiconFetcher::new(dns_resolver, http_client); 96 + 97 + // Try to fetch non-existent lexicon 98 + let result = fetcher.fetch("place.stream.chat.profile").await; 99 + assert!(result.is_err()); 100 + 101 + match result { 102 + Err(LexiconFetcherError::LexiconNotFound(nsid)) => { 103 + assert_eq!(nsid, "place.stream.chat.profile"); 104 + } 105 + _ => panic!("Expected LexiconNotFound error"), 106 + } 107 + } 108 + 109 + #[tokio::test] 110 + async fn test_fetch_dns_lookup_failed() { 111 + // Setup mock DNS resolver (but don't add the record) 112 + let dns_resolver = MockDnsResolver::new(); 113 + 114 + // Setup mock HTTP client 115 + let http_client = MockHttpClient::new(); 116 + 117 + // Create fetcher 118 + let fetcher = LexiconFetcher::new(dns_resolver, http_client); 119 + 120 + // Try to fetch with no DNS record 121 + let result = fetcher.fetch("place.stream.chat.profile").await; 122 + assert!(result.is_err()); 123 + 124 + match result { 125 + Err(LexiconFetcherError::LookupFailed { domain, .. }) => { 126 + assert_eq!(domain, "_lexicon.chat.profile.stream.place"); 127 + } 128 + _ => panic!("Expected LookupFailed error"), 129 + } 130 + } 131 + 132 + #[tokio::test] 133 + async fn test_fetch_with_wildcard_returns_error() { 134 + let dns_resolver = MockDnsResolver::new(); 135 + let http_client = MockHttpClient::new(); 136 + let fetcher = LexiconFetcher::new(dns_resolver, http_client); 137 + 138 + // Try to use wildcard with fetch() instead of fetch_pattern() 139 + let result = fetcher.fetch("place.stream.*").await; 140 + assert!(result.is_err()); 141 + 142 + match result { 143 + Err(LexiconFetcherError::InvalidNsid(msg)) => { 144 + assert!(msg.contains("fetch_pattern()")); 145 + } 146 + _ => panic!("Expected InvalidNsid error"), 147 + } 148 + } 149 + 150 + #[tokio::test] 151 + async fn test_fetch_pattern_empty_results() { 152 + // Setup mock DNS resolver 153 + let mut dns_resolver = MockDnsResolver::new(); 154 + dns_resolver.add_record("place.stream", "chat", "did:plc:test123".to_string()); 155 + 156 + // Setup mock HTTP client with no matching lexicons 157 + let mut http_client = MockHttpClient::new(); 158 + http_client.add_lexicon("place.stream.other.thing".to_string(), json!({})); 159 + 160 + // Create fetcher 161 + let fetcher = LexiconFetcher::new(dns_resolver, http_client); 162 + 163 + // Fetch pattern that matches nothing 164 + let result = fetcher.fetch_pattern("place.stream.chat.*").await; 165 + assert!(result.is_ok()); 166 + let lexicons = result.unwrap(); 167 + assert_eq!(lexicons.len(), 0); 168 + } 169 + 170 + #[tokio::test] 171 + async fn test_multiple_authorities() { 172 + // Setup mock DNS resolver with multiple authorities 173 + let mut dns_resolver = MockDnsResolver::new(); 174 + dns_resolver.add_record("place.stream", "chat.profile", "did:plc:stream123".to_string()); 175 + dns_resolver.add_record("app.bsky", "actor.profile", "did:plc:bsky456".to_string()); 176 + 177 + // Setup mock HTTP client 178 + let mut http_client = MockHttpClient::new(); 179 + http_client.add_lexicon( 180 + "place.stream.chat.profile".to_string(), 181 + json!({"id": "place.stream.chat.profile"}), 182 + ); 183 + http_client.add_lexicon( 184 + "app.bsky.actor.profile".to_string(), 185 + json!({"id": "app.bsky.actor.profile"}), 186 + ); 187 + 188 + // Create fetcher 189 + let fetcher = LexiconFetcher::new(dns_resolver, http_client); 190 + 191 + // Fetch from both authorities 192 + let result1 = fetcher.fetch("place.stream.chat.profile").await; 193 + assert!(result1.is_ok()); 194 + 195 + let result2 = fetcher.fetch("app.bsky.actor.profile").await; 196 + assert!(result2.is_ok()); 197 + } 198 + 199 + #[tokio::test] 200 + async fn test_nested_name_segments() { 201 + // Setup mock DNS resolver 202 + let mut dns_resolver = MockDnsResolver::new(); 203 + dns_resolver.add_record( 204 + "place.stream", 205 + "chat.message.attachments.image", 206 + "did:plc:test123".to_string(), 207 + ); 208 + 209 + // Setup mock HTTP client 210 + let mut http_client = MockHttpClient::new(); 211 + http_client.add_lexicon( 212 + "place.stream.chat.message.attachments.image".to_string(), 213 + json!({"id": "place.stream.chat.message.attachments.image"}), 214 + ); 215 + 216 + // Create fetcher 217 + let fetcher = LexiconFetcher::new(dns_resolver, http_client); 218 + 219 + // Fetch deeply nested lexicon 220 + let result = fetcher.fetch("place.stream.chat.message.attachments.image").await; 221 + assert!(result.is_ok()); 222 + } 223 + 224 + #[tokio::test] 225 + async fn test_concurrent_fetches() { 226 + use std::sync::Arc; 227 + use tokio::task; 228 + 229 + // Setup mock DNS resolver 230 + let mut dns_resolver = MockDnsResolver::new(); 231 + dns_resolver.add_record("place.stream", "chat.profile", "did:plc:test123".to_string()); 232 + 233 + // Setup mock HTTP client 234 + let mut http_client = MockHttpClient::new(); 235 + http_client.add_lexicon( 236 + "place.stream.chat.profile".to_string(), 237 + json!({"id": "place.stream.chat.profile"}), 238 + ); 239 + 240 + // Create fetcher and wrap in Arc 241 + let fetcher = Arc::new(LexiconFetcher::new(dns_resolver, http_client)); 242 + 243 + // Spawn multiple concurrent fetch tasks 244 + let mut handles = vec![]; 245 + for _ in 0..10 { 246 + let fetcher_clone = Arc::clone(&fetcher); 247 + let handle = task::spawn(async move { 248 + fetcher_clone.fetch("place.stream.chat.profile").await 249 + }); 250 + handles.push(handle); 251 + } 252 + 253 + // All should succeed 254 + for handle in handles { 255 + let result = handle.await.unwrap(); 256 + assert!(result.is_ok()); 257 + } 258 + } 259 + 260 + #[tokio::test] 261 + async fn test_pattern_prefix_matching() { 262 + // Setup mock DNS resolver 263 + let mut dns_resolver = MockDnsResolver::new(); 264 + dns_resolver.add_record("app.bsky", "feed", "did:plc:test123".to_string()); 265 + 266 + // Setup mock HTTP client 267 + let mut http_client = MockHttpClient::new(); 268 + http_client.add_lexicon("app.bsky.feed.post".to_string(), json!({"id": "app.bsky.feed.post"})); 269 + http_client.add_lexicon("app.bsky.feed.like".to_string(), json!({"id": "app.bsky.feed.like"})); 270 + http_client.add_lexicon("app.bsky.feed.repost".to_string(), json!({"id": "app.bsky.feed.repost"})); 271 + http_client.add_lexicon("app.bsky.actor.profile".to_string(), json!({"id": "app.bsky.actor.profile"})); 272 + 273 + // Create fetcher 274 + let fetcher = LexiconFetcher::new(dns_resolver, http_client); 275 + 276 + // Fetch only feed.* lexicons 277 + let result = fetcher.fetch_pattern("app.bsky.feed.*").await; 278 + assert!(result.is_ok()); 279 + let lexicons = result.unwrap(); 280 + 281 + // Should only get feed.* lexicons (not actor.profile) 282 + assert_eq!(lexicons.len(), 3); 283 + for (nsid, _) in lexicons { 284 + assert!(nsid.starts_with("app.bsky.feed.")); 285 + } 286 + } 287 + 288 + #[tokio::test] 289 + async fn test_underscore_wildcard_direct_children() { 290 + // Setup mock DNS resolver 291 + let mut dns_resolver = MockDnsResolver::new(); 292 + dns_resolver.add_record("place.stream", "", "did:plc:test123".to_string()); 293 + 294 + // Setup mock HTTP client with nested lexicons 295 + let mut http_client = MockHttpClient::new(); 296 + 297 + // Direct children of place.stream 298 + http_client.add_lexicon("place.stream.chat".to_string(), json!({"id": "place.stream.chat"})); 299 + http_client.add_lexicon("place.stream.key".to_string(), json!({"id": "place.stream.key"})); 300 + http_client.add_lexicon("place.stream.livestream".to_string(), json!({"id": "place.stream.livestream"})); 301 + 302 + // Nested children (should NOT match with _) 303 + http_client.add_lexicon("place.stream.chat.profile".to_string(), json!({"id": "place.stream.chat.profile"})); 304 + http_client.add_lexicon("place.stream.chat.message".to_string(), json!({"id": "place.stream.chat.message"})); 305 + http_client.add_lexicon("place.stream.key.defs".to_string(), json!({"id": "place.stream.key.defs"})); 306 + 307 + // Create fetcher 308 + let fetcher = LexiconFetcher::new(dns_resolver, http_client); 309 + 310 + // Test underscore wildcard (direct children only) 311 + let result = fetcher.fetch_pattern("place.stream._").await; 312 + assert!(result.is_ok()); 313 + let lexicons = result.unwrap(); 314 + 315 + // Should only get 3 direct children 316 + assert_eq!(lexicons.len(), 3); 317 + 318 + let nsids: Vec<&str> = lexicons.iter().map(|(nsid, _)| nsid.as_str()).collect(); 319 + assert!(nsids.contains(&"place.stream.chat")); 320 + assert!(nsids.contains(&"place.stream.key")); 321 + assert!(nsids.contains(&"place.stream.livestream")); 322 + 323 + // Should NOT contain nested children 324 + assert!(!nsids.contains(&"place.stream.chat.profile")); 325 + assert!(!nsids.contains(&"place.stream.chat.message")); 326 + assert!(!nsids.contains(&"place.stream.key.defs")); 327 + } 328 + 329 + #[tokio::test] 330 + async fn test_star_vs_underscore_wildcard() { 331 + // Setup mock DNS resolver 332 + let mut dns_resolver = MockDnsResolver::new(); 333 + dns_resolver.add_record("place.stream", "", "did:plc:test123".to_string()); 334 + 335 + // Setup mock HTTP client with nested lexicons 336 + let mut http_client = MockHttpClient::new(); 337 + http_client.add_lexicon("place.stream.chat".to_string(), json!({"id": "place.stream.chat"})); 338 + http_client.add_lexicon("place.stream.chat.profile".to_string(), json!({"id": "place.stream.chat.profile"})); 339 + http_client.add_lexicon("place.stream.key".to_string(), json!({"id": "place.stream.key"})); 340 + 341 + // Create fetcher 342 + let fetcher = LexiconFetcher::new(dns_resolver, http_client); 343 + 344 + // Test star wildcard (all descendants) 345 + let star_result = fetcher.fetch_pattern("place.stream.*").await; 346 + assert!(star_result.is_ok()); 347 + let star_lexicons = star_result.unwrap(); 348 + assert_eq!(star_lexicons.len(), 3); // All 3 lexicons 349 + 350 + // Test underscore wildcard (direct children only) 351 + let underscore_result = fetcher.fetch_pattern("place.stream._").await; 352 + assert!(underscore_result.is_ok()); 353 + let underscore_lexicons = underscore_result.unwrap(); 354 + assert_eq!(underscore_lexicons.len(), 2); // Only direct children (chat, key) 355 + 356 + let nsids: Vec<&str> = underscore_lexicons.iter().map(|(nsid, _)| nsid.as_str()).collect(); 357 + assert!(nsids.contains(&"place.stream.chat")); 358 + assert!(nsids.contains(&"place.stream.key")); 359 + assert!(!nsids.contains(&"place.stream.chat.profile")); 360 + }
+6
tests/Cargo.toml
··· 12 12 mlf-lang = { path = "../mlf-lang" } 13 13 mlf-codegen = { path = "../mlf-codegen" } 14 14 mlf-diagnostics = { path = "../mlf-diagnostics" } 15 + mlf-lexicon-fetcher = { path = "../mlf-lexicon-fetcher" } 15 16 serde_json = "1.0" 16 17 serde = { version = "1.0", features = ["derive"] } 17 18 toml = "0.8" 19 + tokio = { version = "1", features = ["full"] } 18 20 19 21 [dev-dependencies] 20 22 # Any additional test dependencies ··· 26 28 [[test]] 27 29 name = "diagnostics_integration" 28 30 path = "diagnostics_integration.rs" 31 + 32 + [[test]] 33 + name = "lexicon_fetcher_integration" 34 + path = "lexicon_fetcher_integration.rs"
+1 -1
tests/codegen_integration.rs
··· 27 27 }) 28 28 .collect(); 29 29 30 - let (passed, failed) = test_utils::run_and_report_tests::<fn(&str) -> Result<(), String>>(tests, "Codegen"); 30 + let (_passed, failed) = test_utils::run_and_report_tests::<fn(&str) -> Result<(), String>>(tests, "Codegen"); 31 31 32 32 if !failed.is_empty() { 33 33 panic!(
+7 -4
tests/diagnostics_integration.rs
··· 5 5 use mlf_integration_tests::test_utils; 6 6 use mlf_lang::{parser::parse_lexicon, Workspace}; 7 7 use serde::Deserialize; 8 - use serde_json::Value; 9 8 use std::fs; 10 9 use std::path::Path; 11 10 ··· 17 16 18 17 #[derive(Debug, Deserialize)] 19 18 struct ExpectedError { 19 + #[allow(dead_code)] 20 20 code: String, 21 21 message: String, 22 22 #[serde(default)] 23 + #[allow(dead_code)] 23 24 span: Option<ExpectedSpan>, 24 25 } 25 26 26 27 #[derive(Debug, Deserialize)] 27 28 struct ExpectedSpan { 29 + #[allow(dead_code)] 28 30 start: usize, 31 + #[allow(dead_code)] 29 32 end: usize, 30 33 } 31 34 ··· 47 50 }) 48 51 .collect(); 49 52 50 - let (passed, failed) = test_utils::run_and_report_tests::<fn(&str) -> Result<(), String>>(tests, "Diagnostics"); 53 + let (_passed, failed) = test_utils::run_and_report_tests::<fn(&str) -> Result<(), String>>(tests, "Diagnostics"); 51 54 52 55 if !failed.is_empty() { 53 56 panic!( ··· 96 99 }; 97 100 98 101 // 5. Create diagnostic 99 - let diagnostic = ValidationDiagnostic::new( 102 + let _diagnostic = ValidationDiagnostic::new( 100 103 "input.mlf".to_string(), 101 104 input.clone(), 102 105 namespace.clone(), ··· 135 138 let actual_error = errors_in_module[i]; 136 139 137 140 // Check error code 138 - let actual_code = mlf_diagnostics::get_error_module_namespace_str(actual_error); 141 + let _actual_code = mlf_diagnostics::get_error_module_namespace_str(actual_error); 139 142 140 143 // Format the error message 141 144 let actual_message = format!("{:?}", actual_error);
+4
tests/lexicon_fetcher/dns/basic_resolution/expected.json
··· 1 + { 2 + "status": "success", 3 + "did": "did:plc:test123" 4 + }
+4
tests/lexicon_fetcher/dns/basic_resolution/test.toml
··· 1 + [test] 2 + description = "Basic DNS resolution for place.stream.chat.profile" 3 + nsid = "place.stream.chat.profile" 4 + did = "did:plc:test123"
+4
tests/lexicon_fetcher/dns/missing_record/expected.json
··· 1 + { 2 + "status": "error", 3 + "error": "lookup_failed" 4 + }
+4
tests/lexicon_fetcher/dns/missing_record/test.toml
··· 1 + [test] 2 + description = "DNS lookup for non-existent record" 3 + nsid = "nonexistent.domain.test" 4 + should_fail = true
+4
tests/lexicon_fetcher/dns/nested_segments/expected.json
··· 1 + { 2 + "status": "success", 3 + "did": "did:plc:nested789" 4 + }
+4
tests/lexicon_fetcher/dns/nested_segments/test.toml
··· 1 + [test] 2 + description = "DNS resolution for deeply nested NSID app.bsky.actor.profile.detailed" 3 + nsid = "app.bsky.actor.profile.detailed" 4 + did = "did:plc:nested789"
+4
tests/lexicon_fetcher/dns/wildcard_pattern/expected.json
··· 1 + { 2 + "status": "success", 3 + "did": "did:plc:test456" 4 + }
+4
tests/lexicon_fetcher/dns/wildcard_pattern/test.toml
··· 1 + [test] 2 + description = "DNS resolution for wildcard pattern place.stream.chat.*" 3 + nsid = "place.stream.chat.*" 4 + did = "did:plc:test456"
+220
tests/lexicon_fetcher_integration.rs
··· 1 + // Workspace-level integration tests for lexicon fetcher 2 + // Tests mlf-lexicon-fetcher working with mocks 3 + 4 + use mlf_lexicon_fetcher::{DnsResolver, LexiconFetcher, MockDnsResolver, MockHttpClient}; 5 + use serde::{Deserialize, Serialize}; 6 + use std::fs; 7 + use std::path::Path; 8 + 9 + #[derive(Debug, Deserialize)] 10 + struct TestConfig { 11 + test: TestMetadata, 12 + } 13 + 14 + #[derive(Debug, Deserialize)] 15 + struct TestMetadata { 16 + description: String, 17 + nsid: String, 18 + #[serde(default)] 19 + did: Option<String>, 20 + #[serde(default)] 21 + should_fail: bool, 22 + } 23 + 24 + #[derive(Debug, Deserialize, Serialize, PartialEq)] 25 + struct ExpectedResult { 26 + status: String, 27 + #[serde(skip_serializing_if = "Option::is_none")] 28 + did: Option<String>, 29 + #[serde(skip_serializing_if = "Option::is_none")] 30 + error: Option<String>, 31 + } 32 + 33 + #[tokio::test] 34 + async fn lexicon_fetcher_dns_tests() { 35 + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); 36 + let test_base = format!("{}/lexicon_fetcher/dns", manifest_dir); 37 + let test_dirs = discover_test_dirs(&test_base); 38 + 39 + let mut tests = Vec::new(); 40 + 41 + for test_dir in test_dirs { 42 + let test_name = format!( 43 + "lexicon_fetcher/dns/{}", 44 + Path::new(&test_dir).file_name().unwrap().to_str().unwrap() 45 + ); 46 + let result = run_dns_test(&test_dir).await; 47 + tests.push((test_name, result)); 48 + } 49 + 50 + let (passed, failed) = run_and_report_tests(tests, "Lexicon Fetcher DNS"); 51 + 52 + if !failed.is_empty() { 53 + panic!( 54 + "\nFailed tests:\n{}", 55 + failed 56 + .iter() 57 + .map(|(name, err)| format!(" - {}: {}", name, err)) 58 + .collect::<Vec<_>>() 59 + .join("\n") 60 + ); 61 + } 62 + 63 + assert!(passed > 0, "No tests were run"); 64 + } 65 + 66 + async fn run_dns_test(test_dir: &str) -> Result<(), String> { 67 + // Load test configuration 68 + let config_path = format!("{}/test.toml", test_dir); 69 + let config_str = fs::read_to_string(&config_path) 70 + .map_err(|e| format!("Failed to read test.toml: {}", e))?; 71 + let config: TestConfig = toml::from_str(&config_str) 72 + .map_err(|e| format!("Failed to parse test.toml: {}", e))?; 73 + 74 + // Load expected output 75 + let expected_path = format!("{}/expected.json", test_dir); 76 + let expected_str = fs::read_to_string(&expected_path) 77 + .map_err(|e| format!("Failed to read expected.json: {}", e))?; 78 + let expected: ExpectedResult = serde_json::from_str(&expected_str) 79 + .map_err(|e| format!("Failed to parse expected.json: {}", e))?; 80 + 81 + // Setup mocks 82 + let mut dns_resolver = MockDnsResolver::new(); 83 + let http_client = MockHttpClient::new(); 84 + 85 + // Add DNS record if expected to succeed 86 + if expected.status == "success" { 87 + let (authority, name_segments) = parse_nsid(&config.test.nsid)?; 88 + let did = config.test.did.clone().ok_or_else(|| "Missing DID in test config".to_string())?; 89 + 90 + // For wildcard patterns, remove the .* suffix 91 + let dns_name = if name_segments.ends_with(".*") { 92 + name_segments.strip_suffix(".*").unwrap() 93 + } else { 94 + &name_segments 95 + }; 96 + 97 + dns_resolver.add_record(&authority, dns_name, did.clone()); 98 + } 99 + 100 + // Create fetcher 101 + let fetcher = LexiconFetcher::new(dns_resolver, http_client); 102 + 103 + // Test DNS resolution by attempting to extract the DID 104 + // We can't directly test DNS resolution without HTTP, so we test the full flow 105 + // but expect it to fail at HTTP stage (which is fine for DNS testing) 106 + let (authority, name_segments) = parse_nsid(&config.test.nsid)?; 107 + 108 + // For wildcard patterns, strip the .* suffix 109 + let dns_name = if name_segments.ends_with(".*") { 110 + name_segments.strip_suffix(".*").unwrap().to_string() 111 + } else { 112 + name_segments 113 + }; 114 + 115 + // Create a test resolver directly to check DNS 116 + let mut test_dns = MockDnsResolver::new(); 117 + if expected.status == "success" { 118 + let did = config.test.did.clone().ok_or_else(|| "Missing DID in test config".to_string())?; 119 + test_dns.add_record(&authority, &dns_name, did.clone()); 120 + 121 + // Verify DNS resolution 122 + match test_dns.resolve_lexicon_did(&authority, &dns_name).await { 123 + Ok(resolved_did) => { 124 + if resolved_did != did { 125 + return Err(format!("DID mismatch: expected {}, got {}", did, resolved_did)); 126 + } 127 + // Check against expected 128 + if let Some(expected_did) = &expected.did { 129 + if &resolved_did != expected_did { 130 + return Err(format!("DID mismatch with expected: expected {}, got {}", expected_did, resolved_did)); 131 + } 132 + } 133 + } 134 + Err(e) => return Err(format!("DNS resolution failed: {:?}", e)), 135 + } 136 + } else { 137 + // Expected to fail 138 + match test_dns.resolve_lexicon_did(&authority, &dns_name).await { 139 + Ok(_) => return Err("Expected DNS lookup to fail, but it succeeded".to_string()), 140 + Err(_) => { 141 + // Success - it failed as expected 142 + } 143 + } 144 + } 145 + 146 + Ok(()) 147 + } 148 + 149 + fn parse_nsid(nsid: &str) -> Result<(String, String), String> { 150 + // Remove wildcard if present for parsing 151 + let nsid_base = nsid.strip_suffix(".*").unwrap_or(nsid); 152 + 153 + let parts: Vec<&str> = nsid_base.split('.').collect(); 154 + if parts.len() < 2 { 155 + return Err(format!("Invalid NSID: {}", nsid)); 156 + } 157 + 158 + let authority = format!("{}.{}", parts[0], parts[1]); 159 + let name_segments = if parts.len() > 2 { 160 + let mut segments = parts[2..].join("."); 161 + // Re-add wildcard if original had it 162 + if nsid.ends_with(".*") { 163 + segments.push_str(".*"); 164 + } 165 + segments 166 + } else if nsid.ends_with(".*") { 167 + ".*".to_string() 168 + } else { 169 + String::new() 170 + }; 171 + 172 + Ok((authority, name_segments)) 173 + } 174 + 175 + fn discover_test_dirs(base: &str) -> Vec<String> { 176 + let base_path = Path::new(base); 177 + if !base_path.exists() { 178 + return vec![]; 179 + } 180 + 181 + let mut dirs: Vec<String> = fs::read_dir(base_path) 182 + .unwrap() 183 + .filter_map(|entry| { 184 + let entry = entry.ok()?; 185 + let path = entry.path(); 186 + if path.is_dir() { 187 + Some(path.to_str()?.to_string()) 188 + } else { 189 + None 190 + } 191 + }) 192 + .collect(); 193 + 194 + dirs.sort(); 195 + dirs 196 + } 197 + 198 + fn run_and_report_tests( 199 + tests: Vec<(String, Result<(), String>)>, 200 + test_type: &str, 201 + ) -> (usize, Vec<(String, String)>) { 202 + let mut passed = 0; 203 + let mut failed = Vec::new(); 204 + 205 + for (test_name, result) in tests { 206 + match result { 207 + Ok(()) => { 208 + println!("✓ {}", test_name); 209 + passed += 1; 210 + } 211 + Err(err) => { 212 + println!("✗ {}: {}", test_name, err); 213 + failed.push((test_name, err)); 214 + } 215 + } 216 + } 217 + 218 + println!("\n{} Results: {} passed, {} failed", test_type, passed, failed.len()); 219 + (passed, failed) 220 + }