tangled
alpha
login
or
join now
vmx-atproto-dev.bsky.social
/
mlf
forked from
stavola.xyz/mlf
0
fork
atom
A human-friendly DSL for ATProto Lexicons
0
fork
atom
overview
issues
pulls
pipelines
Factor out mlf-lexicon-fetcher
stavola.xyz
5 months ago
bcdc2ce5
b51fde69
+2028
-463
24 changed files
expand all
collapse all
unified
split
Cargo.lock
Cargo.toml
justfile
mlf-cli
Cargo.toml
src
fetch.rs
main.rs
mlf-lang
tests
integration_test.rs
mlf-lexicon-fetcher
Cargo.toml
examples
usage.rs
src
lib.rs
tests
dns_scenarios.rs
lexicon_fetching.rs
tests
Cargo.toml
codegen_integration.rs
diagnostics_integration.rs
lexicon_fetcher
dns
basic_resolution
expected.json
test.toml
missing_record
expected.json
test.toml
nested_segments
expected.json
test.toml
wildcard_pattern
expected.json
test.toml
lexicon_fetcher_integration.rs
+17
-3
Cargo.lock
···
1290
1290
"chrono",
1291
1291
"clap",
1292
1292
"glob",
1293
1293
-
"hickory-resolver",
1294
1293
"miette",
1295
1294
"mlf-codegen",
1296
1295
"mlf-codegen-go",
···
1298
1297
"mlf-codegen-typescript",
1299
1298
"mlf-diagnostics",
1300
1299
"mlf-lang",
1300
1300
+
"mlf-lexicon-fetcher",
1301
1301
"mlf-validation",
1302
1302
"reqwest",
1303
1303
"serde",
1304
1304
"serde_json",
1305
1305
"sha2",
1306
1306
"thiserror 2.0.17",
1307
1307
+
"tokio",
1307
1308
"toml",
1308
1309
]
1309
1310
···
1358
1359
"mlf-codegen",
1359
1360
"mlf-diagnostics",
1360
1361
"mlf-lang",
1362
1362
+
"mlf-lexicon-fetcher",
1361
1363
"serde",
1362
1364
"serde_json",
1365
1365
+
"tokio",
1363
1366
"toml",
1364
1367
]
1365
1368
···
1373
1376
"serde",
1374
1377
"serde_json",
1375
1378
"toml",
1379
1379
+
]
1380
1380
+
1381
1381
+
[[package]]
1382
1382
+
name = "mlf-lexicon-fetcher"
1383
1383
+
version = "0.1.0"
1384
1384
+
dependencies = [
1385
1385
+
"async-trait",
1386
1386
+
"hickory-resolver",
1387
1387
+
"reqwest",
1388
1388
+
"serde",
1389
1389
+
"serde_json",
1390
1390
+
"thiserror 2.0.17",
1391
1391
+
"tokio",
1376
1392
]
1377
1393
1378
1394
[[package]]
···
1801
1817
"base64",
1802
1818
"bytes",
1803
1819
"encoding_rs",
1804
1804
-
"futures-channel",
1805
1820
"futures-core",
1806
1806
-
"futures-util",
1807
1821
"h2",
1808
1822
"http",
1809
1823
"http-body",
+1
Cargo.toml
···
7
7
"mlf-cli",
8
8
"mlf-codegen",
9
9
"mlf-diagnostics",
10
10
+
"mlf-lexicon-fetcher",
10
11
"mlf-lang",
11
12
"mlf-lsp",
12
13
"mlf-validation", "mlf-wasm",
+12
-6
justfile
···
5
5
default: test
6
6
7
7
# Run all tests (excluding problematic packages)
8
8
-
test: test-lang test-codegen test-diagnostics test-validation
8
8
+
test: test-lang test-codegen test-diagnostics test-lexicon-fetcher test-validation
9
9
10
10
# Run only language tests (mlf-lang crate)
11
11
test-lang:
···
21
21
test-diagnostics:
22
22
@echo "\nRunning diagnostics integration tests..."
23
23
cargo test -p mlf-integration-tests --test diagnostics_integration -- --nocapture
24
24
+
25
25
+
# Run lexicon fetcher tests
26
26
+
test-lexicon-fetcher:
27
27
+
@echo "\nRunning lexicon fetcher tests..."
28
28
+
cargo test -p mlf-lexicon-fetcher -- --nocapture
24
29
25
30
# Run validation tests
26
31
test-validation:
···
78
83
# Show test statistics
79
84
test-stats:
80
85
@echo "Test Statistics:"
81
81
-
@echo " Lang tests: 21 tests in mlf-lang/tests/lang/"
82
82
-
@echo " Codegen tests: 10 tests in tests/codegen/lexicon/"
83
83
-
@echo " Diagnostics tests: 1 test in tests/diagnostics/"
84
84
-
@echo " Validation tests: 12 tests in mlf-validation"
86
86
+
@echo " Lang tests: 21 tests in mlf-lang/tests/lang/"
87
87
+
@echo " Codegen tests: 10 tests in tests/codegen/lexicon/"
88
88
+
@echo " Diagnostics tests: 1 test in tests/diagnostics/"
89
89
+
@echo " Lexicon fetcher tests: 33 tests in mlf-lexicon-fetcher/"
90
90
+
@echo " Validation tests: 12 tests in mlf-validation"
85
91
@echo ""
86
86
-
@echo "Total integration tests: 44"
92
92
+
@echo "Total integration tests: 77"
87
93
88
94
# List all test directories
89
95
test-list:
+3
-2
mlf-cli/Cargo.toml
···
13
13
mlf-validation = { path = "../mlf-validation" }
14
14
mlf-codegen = { path = "../mlf-codegen" }
15
15
mlf-diagnostics = { path = "../mlf-diagnostics" }
16
16
+
mlf-lexicon-fetcher = { path = "../mlf-lexicon-fetcher" }
16
17
clap = { version = "4.5.48", features = ["derive"] }
17
18
miette = { version = "7", features = ["fancy"] }
18
19
thiserror = "2"
···
20
21
serde_json = "1"
21
22
glob = "0.3"
22
23
toml = "0.8"
23
23
-
reqwest = { version = "0.12", features = ["blocking", "json"] }
24
24
+
tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
25
25
+
reqwest = { version = "0.12", features = ["json"] }
24
26
chrono = { version = "0.4", features = ["serde"] }
25
25
-
hickory-resolver = "0.24"
26
27
sha2 = "0.10"
27
28
28
29
# Optional code generator plugins
+107
-445
mlf-cli/src/fetch.rs
···
1
1
use crate::config::{find_project_root, get_mlf_cache_dir, init_mlf_cache, ConfigError, MlfConfig, LockFile};
2
2
-
use hickory_resolver::config::*;
3
3
-
use hickory_resolver::Resolver;
2
2
+
use mlf_lexicon_fetcher::{optimize_fetch_patterns, ProductionLexiconFetcher};
4
3
use miette::Diagnostic;
5
5
-
use serde::Deserialize;
6
4
use sha2::{Digest, Sha256};
7
5
use std::collections::HashSet;
8
6
use thiserror::Error;
···
17
15
#[diagnostic(code(mlf::fetch::init_failed))]
18
16
InitFailed(#[source] std::io::Error),
19
17
20
20
-
#[error("DNS lookup failed: {0}")]
21
21
-
#[diagnostic(code(mlf::fetch::dns_error))]
22
22
-
DnsError(String),
23
23
-
24
24
-
#[error("Failed to parse DID from TXT record: {0}")]
25
25
-
#[diagnostic(code(mlf::fetch::did_parse_error))]
26
26
-
DidParseError(String),
27
27
-
28
18
#[error("Failed to fetch lexicon from ATProto repo: {0}")]
29
19
#[diagnostic(code(mlf::fetch::http_error))]
30
20
HttpError(String),
···
47
37
}
48
38
49
39
50
50
-
#[derive(Debug, Deserialize)]
51
51
-
struct AtProtoRecord {
52
52
-
uri: String,
53
53
-
value: serde_json::Value,
54
54
-
}
55
40
56
41
/// Main entry point for fetch command
57
57
-
pub fn run_fetch(nsid: Option<String>, save: bool, update: bool, locked: bool) -> Result<(), FetchError> {
42
42
+
pub async fn run_fetch(nsid: Option<String>, save: bool, update: bool, locked: bool) -> Result<(), FetchError> {
58
43
// Validate flags
59
44
if update && locked {
60
45
return Err(FetchError::HttpError(
···
76
61
let config_path = project_root.join("mlf.toml");
77
62
let config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?;
78
63
79
79
-
fetch_lexicon_with_lock(&namespace, &project_root, &mut lockfile)?;
64
64
+
fetch_lexicon_with_lock(&namespace, &project_root, &mut lockfile).await?;
80
65
81
66
// Handle transitive dependencies if enabled
82
67
if config.dependencies.allow_transitive_deps {
···
85
70
&project_root,
86
71
&mut lockfile,
87
72
config.dependencies.optimize_transitive_fetches
88
88
-
)?;
73
73
+
).await?;
89
74
}
90
75
91
76
// Save lockfile
···
101
86
}
102
87
None => {
103
88
// Fetch all dependencies from mlf.toml
104
104
-
fetch_all_dependencies(&project_root, update, locked)
89
89
+
fetch_all_dependencies(&project_root, update, locked).await
105
90
}
106
91
}
107
92
}
···
132
117
}
133
118
}
134
119
135
135
-
fn fetch_all_dependencies(project_root: &std::path::Path, update: bool, locked: bool) -> Result<(), FetchError> {
120
120
+
async fn fetch_all_dependencies(project_root: &std::path::Path, update: bool, locked: bool) -> Result<(), FetchError> {
136
121
// Load mlf.toml
137
122
let config_path = project_root.join("mlf.toml");
138
123
let config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?;
···
160
145
// In locked mode, we use the lockfile and verify nothing needs updating
161
146
// For now, we'll just use the lockfile - verification can be enhanced later
162
147
println!("Using locked dependencies from mlf-lock.toml");
163
163
-
return fetch_from_lockfile(project_root, &existing_lockfile);
148
148
+
return fetch_from_lockfile(project_root, &existing_lockfile).await;
164
149
}
165
150
166
151
// Determine fetch mode
···
192
177
// Fetch initial dependencies
193
178
for dep in &config.dependencies.dependencies {
194
179
println!("\nFetching: {}", dep);
195
195
-
match fetch_lexicon_with_lock(dep, project_root, &mut lockfile) {
180
180
+
match fetch_lexicon_with_lock(dep, project_root, &mut lockfile).await {
196
181
Ok(()) => {
197
182
success_count += 1;
198
183
fetched_nsids.insert(dep.clone());
···
205
190
206
191
// If transitive dependencies are enabled, fetch them
207
192
if allow_transitive {
208
208
-
fetch_transitive_dependencies(&project_root, &mut lockfile, config.dependencies.optimize_transitive_fetches)?;
193
193
+
fetch_transitive_dependencies(&project_root, &mut lockfile, config.dependencies.optimize_transitive_fetches).await?;
209
194
}
210
195
211
196
// Save the lockfile
···
232
217
}
233
218
234
219
/// Fetch transitive dependencies by iteratively resolving unresolved references
235
235
-
fn fetch_transitive_dependencies(
220
220
+
async fn fetch_transitive_dependencies(
236
221
project_root: &std::path::Path,
237
222
lockfile: &mut LockFile,
238
223
optimize_fetches: bool
···
287
272
println!("\nFetching transitive dependency: {}", pattern);
288
273
fetched_nsids.insert(pattern.clone());
289
274
290
290
-
match fetch_lexicon_with_lock(&pattern, project_root, lockfile) {
275
275
+
match fetch_lexicon_with_lock(&pattern, project_root, lockfile).await {
291
276
Ok(()) => {}
292
277
Err(e) => {
293
278
eprintln!(" Warning: Failed to fetch {}: {}", pattern, e);
···
320
305
println!(" Fetching: {}", nsid);
321
306
fetched_nsids.insert(nsid.clone());
322
307
323
323
-
match fetch_lexicon_with_lock(&nsid, project_root, lockfile) {
308
308
+
match fetch_lexicon_with_lock(&nsid, project_root, lockfile).await {
324
309
Ok(()) => {}
325
310
Err(e) => {
326
311
eprintln!(" Warning: Failed to fetch {}: {}", nsid, e);
···
339
324
println!("\nFetching transitive dependency: {}", nsid);
340
325
fetched_nsids.insert(nsid.clone());
341
326
342
342
-
match fetch_lexicon_with_lock(nsid, project_root, lockfile) {
327
327
+
match fetch_lexicon_with_lock(nsid, project_root, lockfile).await {
343
328
Ok(()) => {}
344
329
Err(e) => {
345
330
// Don't fail the entire fetch for transitive deps
···
355
340
356
341
/// Fetch dependencies using the lockfile
357
342
/// This refetches each lexicon from its recorded DID and verifies the checksum
358
358
-
fn fetch_from_lockfile(project_root: &std::path::Path, lockfile: &LockFile) -> Result<(), FetchError> {
343
343
+
async fn fetch_from_lockfile(project_root: &std::path::Path, lockfile: &LockFile) -> Result<(), FetchError> {
359
344
if lockfile.lexicons.is_empty() {
360
345
println!("Lockfile is empty");
361
346
return Ok(());
···
371
356
println!("\nRefetching: {}", nsid);
372
357
373
358
// Fetch the lexicon using the DID from lockfile
374
374
-
match fetch_specific_lexicon(nsid, &locked.did, &locked.checksum, project_root) {
359
359
+
match fetch_specific_lexicon(nsid, &locked.did, &locked.checksum, project_root).await {
375
360
Ok(()) => {
376
361
success_count += 1;
377
362
}
···
401
386
}
402
387
403
388
/// Fetch a specific lexicon by NSID from a known DID, verifying checksum
404
404
-
fn fetch_specific_lexicon(
389
389
+
async fn fetch_specific_lexicon(
405
390
nsid: &str,
406
391
did: &str,
407
392
expected_checksum: &str,
···
411
396
init_mlf_cache(project_root).map_err(FetchError::InitFailed)?;
412
397
let mlf_dir = get_mlf_cache_dir(project_root);
413
398
414
414
-
// Fetch records from the DID
415
415
-
let records = fetch_lexicon_records(did)?;
399
399
+
// Create fetcher and fetch from known DID (bypassing DNS)
400
400
+
let fetcher = ProductionLexiconFetcher::production()
401
401
+
.await
402
402
+
.map_err(|e| FetchError::HttpError(format!("Failed to create fetcher: {}", e)))?;
416
403
417
417
-
// Find the specific NSID
418
418
-
for record in records {
419
419
-
let record_nsid = extract_nsid_from_record(&record)?;
404
404
+
let result = fetcher
405
405
+
.fetch_from_did_with_metadata(did, nsid)
406
406
+
.await
407
407
+
.map_err(|e| FetchError::HttpError(format!("Failed to fetch from DID: {}", e)))?;
420
408
421
421
-
if record_nsid == nsid {
422
422
-
// Found it! Process and verify checksum
423
423
-
let json_str = serde_json::to_string_pretty(&record.value)?;
424
424
-
let hash = calculate_hash(&json_str);
409
409
+
if result.lexicons.is_empty() {
410
410
+
return Err(FetchError::HttpError(format!(
411
411
+
"Lexicon {} not found in repo {}",
412
412
+
nsid, did
413
413
+
)));
414
414
+
}
425
415
426
426
-
if hash != expected_checksum {
427
427
-
return Err(FetchError::HttpError(format!(
428
428
-
"Checksum mismatch for {}: expected {}, got {}",
429
429
-
nsid, expected_checksum, hash
430
430
-
)));
431
431
-
}
416
416
+
// We should only get one lexicon for an exact NSID match
417
417
+
let fetched = &result.lexicons[0];
432
418
433
433
-
// Save JSON
434
434
-
let mut json_path = mlf_dir.join("lexicons/json");
435
435
-
for segment in nsid.split('.') {
436
436
-
json_path.push(segment);
437
437
-
}
438
438
-
json_path.set_extension("json");
419
419
+
if fetched.nsid != nsid {
420
420
+
return Err(FetchError::HttpError(format!(
421
421
+
"Expected lexicon {}, but got {}",
422
422
+
nsid, fetched.nsid
423
423
+
)));
424
424
+
}
425
425
+
426
426
+
// Verify checksum
427
427
+
let json_str = serde_json::to_string_pretty(&fetched.lexicon)?;
428
428
+
let hash = calculate_hash(&json_str);
439
429
440
440
-
if let Some(parent) = json_path.parent() {
441
441
-
std::fs::create_dir_all(parent)?;
442
442
-
}
443
443
-
std::fs::write(&json_path, &json_str)?;
444
444
-
println!(" → Saved JSON (checksum verified)");
430
430
+
if hash != expected_checksum {
431
431
+
return Err(FetchError::HttpError(format!(
432
432
+
"Checksum mismatch for {}: expected {}, got {}",
433
433
+
nsid, expected_checksum, hash
434
434
+
)));
435
435
+
}
445
436
446
446
-
// Convert to MLF
447
447
-
let mlf_content = crate::generate::mlf::generate_mlf_from_json(&record.value)
448
448
-
.map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?;
437
437
+
// Save JSON
438
438
+
let mut json_path = mlf_dir.join("lexicons/json");
439
439
+
for segment in nsid.split('.') {
440
440
+
json_path.push(segment);
441
441
+
}
442
442
+
json_path.set_extension("json");
449
443
450
450
-
let mut mlf_path = mlf_dir.join("lexicons/mlf");
451
451
-
for segment in nsid.split('.') {
452
452
-
mlf_path.push(segment);
453
453
-
}
454
454
-
mlf_path.set_extension("mlf");
444
444
+
if let Some(parent) = json_path.parent() {
445
445
+
std::fs::create_dir_all(parent)?;
446
446
+
}
447
447
+
std::fs::write(&json_path, &json_str)?;
448
448
+
println!(" → Saved JSON (checksum verified)");
455
449
456
456
-
if let Some(parent) = mlf_path.parent() {
457
457
-
std::fs::create_dir_all(parent)?;
458
458
-
}
459
459
-
std::fs::write(&mlf_path, mlf_content)?;
460
460
-
println!(" → Converted to MLF");
450
450
+
// Convert to MLF
451
451
+
let mlf_content = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon)
452
452
+
.map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?;
461
453
462
462
-
return Ok(());
463
463
-
}
454
454
+
let mut mlf_path = mlf_dir.join("lexicons/mlf");
455
455
+
for segment in nsid.split('.') {
456
456
+
mlf_path.push(segment);
464
457
}
458
458
+
mlf_path.set_extension("mlf");
465
459
466
466
-
Err(FetchError::HttpError(format!(
467
467
-
"Lexicon {} not found in repo {}",
468
468
-
nsid, did
469
469
-
)))
460
460
+
if let Some(parent) = mlf_path.parent() {
461
461
+
std::fs::create_dir_all(parent)?;
462
462
+
}
463
463
+
std::fs::write(&mlf_path, mlf_content)?;
464
464
+
println!(" → Converted to MLF");
465
465
+
466
466
+
Ok(())
470
467
}
471
468
472
469
fn save_dependency(project_root: &std::path::Path, nsid: &str) -> Result<(), FetchError> {
···
485
482
Ok(())
486
483
}
487
484
488
488
-
pub fn fetch_lexicon(nsid: &str, project_root: &std::path::Path) -> Result<(), FetchError> {
489
489
-
let mut lockfile = LockFile::new();
490
490
-
fetch_lexicon_with_lock(nsid, project_root, &mut lockfile)
491
491
-
}
492
492
-
493
493
-
fn fetch_lexicon_with_lock(nsid: &str, project_root: &std::path::Path, lockfile: &mut LockFile) -> Result<(), FetchError> {
485
485
+
async fn fetch_lexicon_with_lock(nsid: &str, project_root: &std::path::Path, lockfile: &mut LockFile) -> Result<(), FetchError> {
494
486
// Initialize .mlf directory
495
487
init_mlf_cache(project_root).map_err(FetchError::InitFailed)?;
496
496
-
497
488
let mlf_dir = get_mlf_cache_dir(&project_root);
498
489
499
499
-
// Validate NSID format: must be specific (3+ segments) or use wildcard
490
490
+
// Validate NSID format
500
491
validate_nsid_format(nsid)?;
501
492
502
502
-
// Check if it's a wildcard pattern
503
503
-
let is_wildcard = nsid.ends_with(".*");
504
504
-
let nsid_pattern = if is_wildcard {
505
505
-
nsid.strip_suffix(".*").unwrap()
506
506
-
} else {
507
507
-
nsid
508
508
-
};
509
509
-
510
510
-
// Extract authority and name segments from NSID
511
511
-
// For "app.bsky.actor.profile", authority is "app.bsky", name is "actor.profile"
512
512
-
// For DNS lookup, we need "_lexicon.actor.bsky.app"
513
513
-
let (authority, name_segments) = extract_authority_and_name(nsid_pattern)?;
514
493
println!("Fetching lexicons for pattern: {}", nsid);
515
494
516
516
-
// Step 1: DNS TXT lookup
517
517
-
let did = resolve_lexicon_did(&authority, &name_segments)?;
518
518
-
println!(" → Resolved DID: {}", did);
495
495
+
// Create the lexicon fetcher (encapsulates all DNS and HTTP logic)
496
496
+
let fetcher = ProductionLexiconFetcher::production()
497
497
+
.await
498
498
+
.map_err(|e| FetchError::HttpError(format!("Failed to create fetcher: {}", e)))?;
519
499
520
520
-
// Step 2: Query ATProto repo for lexicon schemas
521
521
-
let records = fetch_lexicon_records(&did)?;
522
522
-
println!(" → Found {} lexicon record(s)", records.len());
500
500
+
// Fetch lexicons with metadata
501
501
+
let result = fetcher
502
502
+
.fetch_with_metadata(nsid)
503
503
+
.await
504
504
+
.map_err(|e| FetchError::HttpError(format!("Failed to fetch: {}", e)))?;
523
505
524
524
-
if records.is_empty() {
506
506
+
if result.lexicons.is_empty() {
525
507
return Err(FetchError::HttpError(format!(
526
526
-
"No lexicon records found for {}",
508
508
+
"No lexicons matched pattern: {}",
527
509
nsid
528
510
)));
529
511
}
530
512
531
531
-
let mut processed_count = 0;
532
532
-
533
533
-
// Step 3: Process each record
534
534
-
for record in records {
535
535
-
// Extract NSID from record URI or value
536
536
-
let record_nsid = extract_nsid_from_record(&record)?;
537
537
-
538
538
-
// Match against pattern
539
539
-
let matches = if is_wildcard {
540
540
-
// Wildcard: match all records starting with the pattern
541
541
-
// For "app.bsky.actor.*", nsid_pattern is "app.bsky.actor"
542
542
-
// Should match "app.bsky.actor.defs", "app.bsky.actor.profile", etc.
543
543
-
let starts_with_pattern = record_nsid.starts_with(nsid_pattern);
544
544
-
let has_more_segments = record_nsid.len() > nsid_pattern.len();
545
545
-
let is_direct_child = if starts_with_pattern && has_more_segments {
546
546
-
// Check if the next character after the pattern is a dot
547
547
-
record_nsid.chars().nth(nsid_pattern.len()) == Some('.')
548
548
-
} else {
549
549
-
false
550
550
-
};
551
551
-
552
552
-
starts_with_pattern && has_more_segments && is_direct_child
553
553
-
} else {
554
554
-
// Specific: exact match only
555
555
-
record_nsid == nsid
556
556
-
};
513
513
+
println!(" → Found {} lexicon record(s)", result.lexicons.len());
557
514
558
558
-
if !matches {
559
559
-
continue;
560
560
-
}
515
515
+
// Process each fetched lexicon
516
516
+
for fetched in &result.lexicons {
517
517
+
println!(" Processing: {}", fetched.nsid);
561
518
562
562
-
println!(" Processing: {}", record_nsid);
563
563
-
processed_count += 1;
564
564
-
565
565
-
// Save JSON file with directory structure
566
566
-
// e.g., "place.stream.key" -> "place/stream/key.json"
567
567
-
let json_str = serde_json::to_string_pretty(&record.value)?;
519
519
+
// Save JSON file
520
520
+
let json_str = serde_json::to_string_pretty(&fetched.lexicon)?;
568
521
let mut json_path = mlf_dir.join("lexicons/json");
569
569
-
for segment in record_nsid.split('.') {
522
522
+
for segment in fetched.nsid.split('.') {
570
523
json_path.push(segment);
571
524
}
572
525
json_path.set_extension("json");
573
526
574
574
-
// Create parent directories
575
527
if let Some(parent) = json_path.parent() {
576
528
std::fs::create_dir_all(parent)?;
577
529
}
578
578
-
579
530
std::fs::write(&json_path, &json_str)?;
580
531
println!(" → Saved JSON to {}", json_path.display());
581
532
582
533
// Convert to MLF
583
583
-
let mlf_content = crate::generate::mlf::generate_mlf_from_json(&record.value)
534
534
+
let mlf_content = crate::generate::mlf::generate_mlf_from_json(&fetched.lexicon)
584
535
.map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?;
585
536
586
586
-
// Save MLF file with directory structure
587
587
-
// e.g., "place.stream.key" -> "place/stream/key.mlf"
537
537
+
// Save MLF file
588
538
let mut mlf_path = mlf_dir.join("lexicons/mlf");
589
589
-
for segment in record_nsid.split('.') {
539
539
+
for segment in fetched.nsid.split('.') {
590
540
mlf_path.push(segment);
591
541
}
592
542
mlf_path.set_extension("mlf");
593
543
594
594
-
// Create parent directories
595
544
if let Some(parent) = mlf_path.parent() {
596
545
std::fs::create_dir_all(parent)?;
597
546
}
598
598
-
599
547
std::fs::write(&mlf_path, mlf_content)?;
600
548
println!(" → Converted to MLF at {}", mlf_path.display());
601
549
602
602
-
// Calculate hash of JSON content
550
550
+
// Calculate hash and extract dependencies for lockfile
603
551
let hash = calculate_hash(&json_str);
552
552
+
let dependencies = extract_dependencies_from_json(&fetched.lexicon);
604
553
605
605
-
// Extract dependencies from JSON
606
606
-
let dependencies = extract_dependencies_from_json(&record.value);
607
607
-
608
608
-
// Update lockfile
609
609
-
lockfile.add_lexicon(record_nsid.clone(), did.clone(), hash.clone(), dependencies);
610
610
-
}
611
611
-
612
612
-
if processed_count == 0 {
613
613
-
return Err(FetchError::HttpError(format!(
614
614
-
"No lexicons matched pattern: {}",
615
615
-
nsid
616
616
-
)));
554
554
+
// Update lockfile with DID from fetcher metadata
555
555
+
lockfile.add_lexicon(fetched.nsid.clone(), fetched.did.clone(), hash, dependencies);
617
556
}
618
557
619
619
-
println!("✓ Successfully fetched {} lexicon(s) for {}", processed_count, nsid);
558
558
+
println!("✓ Successfully fetched {} lexicon(s) for {}", result.lexicons.len(), nsid);
620
559
Ok(())
621
560
}
622
561
623
562
fn validate_nsid_format(nsid: &str) -> Result<(), FetchError> {
624
624
-
// Remove wildcard suffix for validation
625
625
-
let nsid_base = nsid.strip_suffix(".*").unwrap_or(nsid);
563
563
+
// Remove wildcard suffix for validation (both .* and ._)
564
564
+
let nsid_base = nsid
565
565
+
.strip_suffix(".*")
566
566
+
.or_else(|| nsid.strip_suffix("._"))
567
567
+
.unwrap_or(nsid);
626
568
627
569
let parts: Vec<&str> = nsid_base.split('.').collect();
628
570
629
571
// NSID must have at least 2 segments (authority)
630
630
-
// e.g., "place.stream", "place.stream.key", or "place.stream.*"
572
572
+
// e.g., "place.stream", "place.stream.key", "place.stream.*", or "place.stream._"
631
573
if parts.len() < 2 {
632
574
return Err(FetchError::InvalidNsid(format!(
633
575
"NSID must have at least 2 segments (e.g., 'place.stream' or 'com.atproto.repo.strongRef'): {}",
···
638
580
Ok(())
639
581
}
640
582
641
641
-
fn extract_authority_and_name(nsid_pattern: &str) -> Result<(String, String), FetchError> {
642
642
-
// NSID format: authority.name(.name)*
643
643
-
// For "place.stream.key", authority is "place.stream" (first 2), name is "key"
644
644
-
// For "app.bsky.actor.profile", authority is "app.bsky" (first 2), name is "actor.profile"
645
645
-
let parts: Vec<&str> = nsid_pattern.split('.').collect();
646
646
-
647
647
-
if parts.len() < 2 {
648
648
-
return Err(FetchError::InvalidNsid(format!(
649
649
-
"NSID must have at least 2 segments: {}",
650
650
-
nsid_pattern
651
651
-
)));
652
652
-
}
653
653
-
654
654
-
// Authority is first 2 segments (reversed domain)
655
655
-
let authority = format!("{}.{}", parts[0], parts[1]);
656
656
-
657
657
-
// Name segments are everything after the authority
658
658
-
let name_segments = if parts.len() > 2 {
659
659
-
parts[2..].join(".")
660
660
-
} else {
661
661
-
String::new()
662
662
-
};
663
663
-
664
664
-
Ok((authority, name_segments))
665
665
-
}
666
666
-
667
667
-
fn resolve_lexicon_did(authority: &str, name_segments: &str) -> Result<String, FetchError> {
668
668
-
// Reverse the authority for DNS lookup and prepend name segments
669
669
-
// For "app.bsky" + "actor": "_lexicon.actor.bsky.app"
670
670
-
// For "place.stream" + "key": "_lexicon.key.stream.place"
671
671
-
let auth_parts: Vec<&str> = authority.split('.').collect();
672
672
-
let reversed_auth: Vec<&str> = auth_parts.iter().rev().copied().collect();
673
673
-
674
674
-
let dns_name = if name_segments.is_empty() {
675
675
-
// No name segments, just use reversed authority
676
676
-
// For "place.stream": "_lexicon.stream.place"
677
677
-
format!("_lexicon.{}", reversed_auth.join("."))
678
678
-
} else {
679
679
-
// Prepend name segments before reversed authority
680
680
-
// For "app.bsky" + "actor": "_lexicon.actor.bsky.app"
681
681
-
format!("_lexicon.{}.{}", name_segments, reversed_auth.join("."))
682
682
-
};
683
683
-
684
684
-
println!(" Looking up DNS TXT record: {}", dns_name);
685
685
-
686
686
-
// Create DNS resolver
687
687
-
let resolver = Resolver::new(ResolverConfig::default(), ResolverOpts::default())
688
688
-
.map_err(|e| FetchError::DnsError(format!("Failed to create DNS resolver: {}", e)))?;
689
689
-
690
690
-
// Lookup TXT records
691
691
-
let response = resolver
692
692
-
.txt_lookup(&dns_name)
693
693
-
.map_err(|e| FetchError::DnsError(format!("DNS TXT lookup failed for {}: {}", dns_name, e)))?;
694
694
-
695
695
-
// Parse TXT records to find DID
696
696
-
for txt_record in response.iter() {
697
697
-
for txt_data in txt_record.txt_data() {
698
698
-
let text = String::from_utf8_lossy(txt_data);
699
699
-
// Look for "did=did:plc:..." or "did=did:web:..."
700
700
-
if let Some(did_value) = text.strip_prefix("did=") {
701
701
-
return Ok(did_value.trim().to_string());
702
702
-
}
703
703
-
}
704
704
-
}
705
705
-
706
706
-
Err(FetchError::DidParseError(format!(
707
707
-
"No DID found in TXT record for {}",
708
708
-
dns_name
709
709
-
)))
710
710
-
}
711
711
-
712
712
-
fn fetch_lexicon_records(did: &str) -> Result<Vec<AtProtoRecord>, FetchError> {
713
713
-
// Query the ATProto repo for records in com.atproto.lexicon.schema collection
714
714
-
// We need to use the repo.listRecords XRPC endpoint
715
715
-
// Note: This API is paginated, so we need to fetch all pages
716
716
-
717
717
-
// First, resolve the DID to a PDS endpoint
718
718
-
let pds_url = resolve_did_to_pds(did)?;
719
719
-
720
720
-
println!(" → Using PDS: {}", pds_url);
721
721
-
722
722
-
let mut all_records = Vec::new();
723
723
-
let mut cursor: Option<String> = None;
724
724
-
let mut page_num = 1;
725
725
-
726
726
-
loop {
727
727
-
// Build URL with optional cursor
728
728
-
let url = if let Some(ref c) = cursor {
729
729
-
format!(
730
730
-
"{}/xrpc/com.atproto.repo.listRecords?repo={}&collection=com.atproto.lexicon.schema&cursor={}",
731
731
-
pds_url, did, c
732
732
-
)
733
733
-
} else {
734
734
-
format!(
735
735
-
"{}/xrpc/com.atproto.repo.listRecords?repo={}&collection=com.atproto.lexicon.schema",
736
736
-
pds_url, did
737
737
-
)
738
738
-
};
739
739
-
740
740
-
println!(" Fetching lexicon records (page {})...", page_num);
741
741
-
742
742
-
let response = reqwest::blocking::get(&url)
743
743
-
.map_err(|e| FetchError::HttpError(format!("Failed to fetch records: {}", e)))?;
744
744
-
745
745
-
if !response.status().is_success() {
746
746
-
return Err(FetchError::HttpError(format!(
747
747
-
"HTTP {} when fetching records",
748
748
-
response.status()
749
749
-
)));
750
750
-
}
751
751
-
752
752
-
let mut list_response: serde_json::Value = response
753
753
-
.json()
754
754
-
.map_err(|e| FetchError::HttpError(format!("Failed to parse response: {}", e)))?;
755
755
-
756
756
-
// Extract records
757
757
-
if let Some(records_array) = list_response.get_mut("records") {
758
758
-
if let Some(records) = records_array.as_array_mut() {
759
759
-
for record_value in records.drain(..) {
760
760
-
let record: AtProtoRecord = serde_json::from_value(record_value)
761
761
-
.map_err(|e| FetchError::HttpError(format!("Failed to parse record: {}", e)))?;
762
762
-
all_records.push(record);
763
763
-
}
764
764
-
}
765
765
-
}
766
766
-
767
767
-
// Check for cursor to continue pagination
768
768
-
cursor = list_response.get("cursor")
769
769
-
.and_then(|c| c.as_str())
770
770
-
.map(|s| s.to_string());
771
771
-
772
772
-
if cursor.is_none() {
773
773
-
break;
774
774
-
}
775
775
-
776
776
-
page_num += 1;
777
777
-
}
778
778
-
779
779
-
Ok(all_records)
780
780
-
}
781
781
-
782
782
-
fn resolve_did_to_pds(did: &str) -> Result<String, FetchError> {
783
783
-
// For did:web:, extract the domain
784
784
-
if let Some(domain) = did.strip_prefix("did:web:") {
785
785
-
return Ok(format!("https://{}", domain));
786
786
-
}
787
787
-
788
788
-
// For did:plc:, we need to query the PLC directory
789
789
-
if did.starts_with("did:plc:") {
790
790
-
// Query plc.directory to resolve DID document
791
791
-
let url = format!("https://plc.directory/{}", did);
792
792
-
793
793
-
let response = reqwest::blocking::get(&url)
794
794
-
.map_err(|e| FetchError::HttpError(format!("Failed to resolve DID: {}", e)))?;
795
795
-
796
796
-
if !response.status().is_success() {
797
797
-
return Err(FetchError::HttpError(format!(
798
798
-
"Failed to resolve DID {}: HTTP {}",
799
799
-
did,
800
800
-
response.status()
801
801
-
)));
802
802
-
}
803
803
-
804
804
-
let did_doc: serde_json::Value = response
805
805
-
.json()
806
806
-
.map_err(|e| FetchError::HttpError(format!("Failed to parse DID document: {}", e)))?;
807
807
-
808
808
-
// Extract PDS endpoint from service array
809
809
-
if let Some(services) = did_doc.get("service").and_then(|v| v.as_array()) {
810
810
-
for service in services {
811
811
-
if service.get("type").and_then(|v| v.as_str()) == Some("AtprotoPersonalDataServer") {
812
812
-
if let Some(endpoint) = service.get("serviceEndpoint").and_then(|v| v.as_str()) {
813
813
-
return Ok(endpoint.trim_end_matches('/').to_string());
814
814
-
}
815
815
-
}
816
816
-
}
817
817
-
}
818
818
-
819
819
-
return Err(FetchError::HttpError(format!(
820
820
-
"No PDS endpoint found in DID document for {}",
821
821
-
did
822
822
-
)));
823
823
-
}
824
824
-
825
825
-
Err(FetchError::HttpError(format!(
826
826
-
"Unsupported DID method: {}",
827
827
-
did
828
828
-
)))
829
829
-
}
830
830
-
831
831
-
fn extract_nsid_from_record(record: &AtProtoRecord) -> Result<String, FetchError> {
832
832
-
// The record value should have an "id" field with the NSID
833
833
-
if let Some(id) = record.value.get("id").and_then(|v| v.as_str()) {
834
834
-
return Ok(id.to_string());
835
835
-
}
836
836
-
837
837
-
// Fallback: try to extract from URI
838
838
-
// URI format: at://did:plc:xxx/com.atproto.lexicon.schema/nsid
839
839
-
if let Some(rkey) = record.uri.split('/').last() {
840
840
-
return Ok(rkey.to_string());
841
841
-
}
842
842
-
843
843
-
Err(FetchError::HttpError(format!(
844
844
-
"Could not extract NSID from record: {}",
845
845
-
record.uri
846
846
-
)))
847
847
-
}
848
583
849
584
/// Calculate SHA-256 hash of content
850
585
fn calculate_hash(content: &str) -> String {
···
994
729
}
995
730
}
996
731
997
997
-
/// Optimize a set of NSIDs by collapsing them into the minimal set of fetch patterns
998
998
-
/// For example: ["app.bsky.actor.foo", "app.bsky.actor.bar"] -> ["app.bsky.actor.*"]
999
999
-
/// This function tries multiple grouping strategies to find the most efficient pattern
1000
1000
-
fn optimize_fetch_patterns(nsids: &HashSet<String>) -> Vec<String> {
1001
1001
-
use std::collections::BTreeMap;
1002
1002
-
1003
1003
-
if nsids.is_empty() {
1004
1004
-
return Vec::new();
1005
1005
-
}
1006
1006
-
1007
1007
-
// Strategy 1: Try grouping by authority (first 2 segments)
1008
1008
-
// e.g., ["app.bsky.actor.foo", "app.bsky.feed.bar"] -> ["app.bsky.*"]
1009
1009
-
let mut authority_groups: BTreeMap<String, Vec<String>> = BTreeMap::new();
1010
1010
-
1011
1011
-
for nsid in nsids {
1012
1012
-
let parts: Vec<&str> = nsid.split('.').collect();
1013
1013
-
if parts.len() >= 2 {
1014
1014
-
let authority = format!("{}.{}", parts[0], parts[1]);
1015
1015
-
authority_groups.entry(authority).or_insert_with(Vec::new).push(nsid.clone());
1016
1016
-
}
1017
1017
-
}
1018
1018
-
1019
1019
-
// Strategy 2: Try grouping by namespace prefix (all but last segment)
1020
1020
-
// e.g., ["app.bsky.actor.foo", "app.bsky.actor.bar"] -> ["app.bsky.actor.*"]
1021
1021
-
let mut prefix_groups: BTreeMap<String, Vec<String>> = BTreeMap::new();
1022
1022
-
1023
1023
-
for nsid in nsids {
1024
1024
-
let parts: Vec<&str> = nsid.split('.').collect();
1025
1025
-
if parts.len() >= 3 {
1026
1026
-
let prefix = parts[..parts.len() - 1].join(".");
1027
1027
-
prefix_groups.entry(prefix).or_insert_with(Vec::new).push(nsid.clone());
1028
1028
-
}
1029
1029
-
}
1030
1030
-
1031
1031
-
let mut result = Vec::new();
1032
1032
-
let mut handled_nsids = HashSet::new();
1033
1033
-
1034
1034
-
// First pass: Apply namespace-level grouping (more specific)
1035
1035
-
for (prefix, group) in &prefix_groups {
1036
1036
-
if group.len() >= 2 && !handled_nsids.contains(&group[0]) {
1037
1037
-
result.push(format!("{}.*", prefix));
1038
1038
-
for nsid in group {
1039
1039
-
handled_nsids.insert(nsid.clone());
1040
1040
-
}
1041
1041
-
}
1042
1042
-
}
1043
1043
-
1044
1044
-
// Second pass: For remaining NSIDs, consider authority-level grouping
1045
1045
-
// Only use authority wildcard if we have 3+ different namespaces under same authority
1046
1046
-
for (authority, group) in &authority_groups {
1047
1047
-
let unhandled: Vec<&String> = group.iter()
1048
1048
-
.filter(|nsid| !handled_nsids.contains(*nsid))
1049
1049
-
.collect();
1050
1050
-
1051
1051
-
if unhandled.len() >= 3 {
1052
1052
-
result.push(format!("{}.*", authority));
1053
1053
-
for nsid in &unhandled {
1054
1054
-
handled_nsids.insert((*nsid).clone());
1055
1055
-
}
1056
1056
-
}
1057
1057
-
}
1058
1058
-
1059
1059
-
// Third pass: Add remaining individual NSIDs
1060
1060
-
for nsid in nsids {
1061
1061
-
if !handled_nsids.contains(nsid) {
1062
1062
-
result.push(nsid.clone());
1063
1063
-
}
1064
1064
-
}
1065
1065
-
1066
1066
-
// Sort for consistent output
1067
1067
-
result.sort();
1068
1068
-
result
1069
1069
-
}
+3
-2
mlf-cli/src/main.rs
···
112
112
},
113
113
}
114
114
115
115
-
fn main() {
115
115
+
#[tokio::main]
116
116
+
async fn main() {
116
117
let cli = Cli::parse();
117
118
118
119
let result: Result<(), miette::Report> = match cli.command {
···
141
142
}
142
143
},
143
144
Commands::Fetch { nsid, save, update, locked } => {
144
144
-
fetch::run_fetch(nsid, save, update, locked).into_diagnostic()
145
145
+
fetch::run_fetch(nsid, save, update, locked).await.into_diagnostic()
145
146
}
146
147
};
147
148
+4
mlf-lang/tests/integration_test.rs
···
11
11
#[serde(default)]
12
12
errors: Vec<ExpectedError>,
13
13
#[serde(default)]
14
14
+
#[allow(dead_code)]
14
15
warnings: Vec<String>,
15
16
#[serde(flatten)]
17
17
+
#[allow(dead_code)]
16
18
extra: HashMap<String, serde_json::Value>,
17
19
}
18
20
···
21
23
#[serde(rename = "type")]
22
24
error_type: String,
23
25
#[serde(default)]
26
26
+
#[allow(dead_code)]
24
27
name: Option<String>,
25
28
#[serde(default)]
29
29
+
#[allow(dead_code)]
26
30
message: Option<String>,
27
31
}
28
32
+17
mlf-lexicon-fetcher/Cargo.toml
···
1
1
+
[package]
2
2
+
name = "mlf-lexicon-fetcher"
3
3
+
version = "0.1.0"
4
4
+
edition = "2024"
5
5
+
license = "MIT"
6
6
+
7
7
+
[dependencies]
8
8
+
hickory-resolver = "0.24"
9
9
+
thiserror = "2.0"
10
10
+
serde = { version = "1.0", features = ["derive"] }
11
11
+
serde_json = "1.0"
12
12
+
reqwest = { version = "0.12", features = ["json"] }
13
13
+
async-trait = "0.1"
14
14
+
tokio = { version = "1", features = ["rt"] }
15
15
+
16
16
+
[dev-dependencies]
17
17
+
tokio = { version = "1", features = ["full"] }
+72
mlf-lexicon-fetcher/examples/usage.rs
···
1
1
+
// Example usage of mlf-lexicon-fetcher
2
2
+
3
3
+
use mlf_lexicon_fetcher::{LexiconFetcher, MockDnsResolver, MockHttpClient};
4
4
+
use serde_json::json;
5
5
+
6
6
+
#[tokio::main]
7
7
+
async fn main() -> Result<(), Box<dyn std::error::Error>> {
8
8
+
// Example 1: Fetch a single lexicon
9
9
+
println!("=== Example 1: Fetch Single Lexicon ===");
10
10
+
11
11
+
let mut dns_resolver = MockDnsResolver::new();
12
12
+
dns_resolver.add_record("place.stream", "chat.profile", "did:plc:test123".to_string());
13
13
+
14
14
+
let mut http_client = MockHttpClient::new();
15
15
+
http_client.add_lexicon(
16
16
+
"place.stream.chat.profile".to_string(),
17
17
+
json!({
18
18
+
"lexicon": 1,
19
19
+
"id": "place.stream.chat.profile",
20
20
+
"defs": {
21
21
+
"main": {
22
22
+
"type": "record",
23
23
+
"description": "A user profile for chat"
24
24
+
}
25
25
+
}
26
26
+
}),
27
27
+
);
28
28
+
29
29
+
let fetcher = LexiconFetcher::new(dns_resolver, http_client);
30
30
+
31
31
+
match fetcher.fetch("place.stream.chat.profile").await {
32
32
+
Ok(lexicon) => {
33
33
+
println!("Successfully fetched lexicon:");
34
34
+
println!("{}", serde_json::to_string_pretty(&lexicon)?);
35
35
+
}
36
36
+
Err(e) => eprintln!("Error: {}", e),
37
37
+
}
38
38
+
39
39
+
// Example 2: Fetch multiple lexicons with a pattern
40
40
+
println!("\n=== Example 2: Fetch Multiple Lexicons with Pattern ===");
41
41
+
42
42
+
let mut dns_resolver2 = MockDnsResolver::new();
43
43
+
dns_resolver2.add_record("app.bsky", "feed", "did:plc:bsky123".to_string());
44
44
+
45
45
+
let mut http_client2 = MockHttpClient::new();
46
46
+
http_client2.add_lexicon(
47
47
+
"app.bsky.feed.post".to_string(),
48
48
+
json!({"lexicon": 1, "id": "app.bsky.feed.post"}),
49
49
+
);
50
50
+
http_client2.add_lexicon(
51
51
+
"app.bsky.feed.like".to_string(),
52
52
+
json!({"lexicon": 1, "id": "app.bsky.feed.like"}),
53
53
+
);
54
54
+
http_client2.add_lexicon(
55
55
+
"app.bsky.feed.repost".to_string(),
56
56
+
json!({"lexicon": 1, "id": "app.bsky.feed.repost"}),
57
57
+
);
58
58
+
59
59
+
let fetcher2 = LexiconFetcher::new(dns_resolver2, http_client2);
60
60
+
61
61
+
match fetcher2.fetch_pattern("app.bsky.feed.*").await {
62
62
+
Ok(lexicons) => {
63
63
+
println!("Successfully fetched {} lexicons:", lexicons.len());
64
64
+
for (nsid, _lexicon) in lexicons {
65
65
+
println!(" - {}", nsid);
66
66
+
}
67
67
+
}
68
68
+
Err(e) => eprintln!("Error: {}", e),
69
69
+
}
70
70
+
71
71
+
Ok(())
72
72
+
}
+880
mlf-lexicon-fetcher/src/lib.rs
···
1
1
+
// MLF Lexicon Fetcher
2
2
+
// Resolves ATProto lexicon NSIDs to DIDs via DNS TXT records
3
3
+
// and fetches lexicon JSON via HTTP
4
4
+
5
5
+
use async_trait::async_trait;
6
6
+
use hickory_resolver::config::{ResolverConfig, ResolverOpts};
7
7
+
use hickory_resolver::TokioAsyncResolver;
8
8
+
use serde::Deserialize;
9
9
+
use std::collections::HashMap;
10
10
+
use std::sync::{Arc, Mutex};
11
11
+
use thiserror::Error;
12
12
+
13
13
+
#[derive(Debug, Deserialize)]
14
14
+
struct AtProtoRecord {
15
15
+
uri: String,
16
16
+
value: serde_json::Value,
17
17
+
}
18
18
+
19
19
+
#[derive(Error, Debug)]
20
20
+
pub enum LexiconFetcherError {
21
21
+
#[error("Failed to create DNS resolver: {0}")]
22
22
+
ResolverCreationFailed(String),
23
23
+
24
24
+
#[error("DNS lookup failed for {domain}: {error}")]
25
25
+
LookupFailed { domain: String, error: String },
26
26
+
27
27
+
#[error("No DID found in TXT record for {0}")]
28
28
+
NoDid(String),
29
29
+
30
30
+
#[error("Invalid NSID format: {0}")]
31
31
+
InvalidNsid(String),
32
32
+
33
33
+
#[error("HTTP request failed: {0}")]
34
34
+
HttpRequestFailed(String),
35
35
+
36
36
+
#[error("Failed to parse JSON response: {0}")]
37
37
+
JsonParseFailed(String),
38
38
+
39
39
+
#[error("Lexicon not found: {0}")]
40
40
+
LexiconNotFound(String),
41
41
+
42
42
+
#[error("Invalid URL: {0}")]
43
43
+
InvalidUrl(String),
44
44
+
}
45
45
+
46
46
+
pub type Result<T> = std::result::Result<T, LexiconFetcherError>;
47
47
+
48
48
+
/// Trait for DNS resolution - allows mocking in tests
49
49
+
#[async_trait]
50
50
+
pub trait DnsResolver: Send + Sync {
51
51
+
/// Resolve an NSID to a DID via DNS TXT lookup
52
52
+
async fn resolve_lexicon_did(&self, authority: &str, name_segments: &str) -> Result<String>;
53
53
+
}
54
54
+
55
55
+
/// Real DNS resolver using hickory_resolver's async resolver
56
56
+
pub struct RealDnsResolver {
57
57
+
resolver: TokioAsyncResolver,
58
58
+
}
59
59
+
60
60
+
impl RealDnsResolver {
61
61
+
pub async fn new() -> Result<Self> {
62
62
+
let resolver = TokioAsyncResolver::tokio(ResolverConfig::default(), ResolverOpts::default());
63
63
+
Ok(Self { resolver })
64
64
+
}
65
65
+
66
66
+
pub async fn with_config(config: ResolverConfig, opts: ResolverOpts) -> Result<Self> {
67
67
+
let resolver = TokioAsyncResolver::tokio(config, opts);
68
68
+
Ok(Self { resolver })
69
69
+
}
70
70
+
}
71
71
+
72
72
+
#[async_trait]
73
73
+
impl DnsResolver for RealDnsResolver {
74
74
+
async fn resolve_lexicon_did(&self, authority: &str, name_segments: &str) -> Result<String> {
75
75
+
let dns_name = construct_dns_name(authority, name_segments);
76
76
+
77
77
+
// Lookup TXT records (async)
78
78
+
let response = self
79
79
+
.resolver
80
80
+
.txt_lookup(&dns_name)
81
81
+
.await
82
82
+
.map_err(|e| LexiconFetcherError::LookupFailed {
83
83
+
domain: dns_name.clone(),
84
84
+
error: e.to_string(),
85
85
+
})?;
86
86
+
87
87
+
// Parse TXT records to find DID
88
88
+
for txt_record in response.iter() {
89
89
+
for txt_data in txt_record.txt_data() {
90
90
+
let text = String::from_utf8_lossy(txt_data);
91
91
+
// Look for "did=did:plc:..." or "did=did:web:..."
92
92
+
if let Some(did_value) = text.strip_prefix("did=") {
93
93
+
return Ok(did_value.trim().to_string());
94
94
+
}
95
95
+
}
96
96
+
}
97
97
+
98
98
+
Err(LexiconFetcherError::NoDid(dns_name))
99
99
+
}
100
100
+
}
101
101
+
102
102
+
/// Mock DNS resolver for testing
103
103
+
#[derive(Clone)]
104
104
+
pub struct MockDnsResolver {
105
105
+
records: Arc<Mutex<HashMap<String, String>>>,
106
106
+
}
107
107
+
108
108
+
impl MockDnsResolver {
109
109
+
pub fn new() -> Self {
110
110
+
Self {
111
111
+
records: Arc::new(Mutex::new(HashMap::new())),
112
112
+
}
113
113
+
}
114
114
+
115
115
+
/// Add a mock DNS record (maps NSID authority+name to DID)
116
116
+
pub fn add_record(&mut self, authority: &str, name_segments: &str, did: String) {
117
117
+
let dns_name = construct_dns_name(authority, name_segments);
118
118
+
self.records.lock().unwrap().insert(dns_name, did);
119
119
+
}
120
120
+
121
121
+
/// Add a mock record using full NSID
122
122
+
pub fn add_record_from_nsid(&mut self, nsid: &str, did: String) -> Result<()> {
123
123
+
let (authority, name_segments) = parse_nsid(nsid)?;
124
124
+
self.add_record(&authority, &name_segments, did);
125
125
+
Ok(())
126
126
+
}
127
127
+
}
128
128
+
129
129
+
impl Default for MockDnsResolver {
130
130
+
fn default() -> Self {
131
131
+
Self::new()
132
132
+
}
133
133
+
}
134
134
+
135
135
+
#[async_trait]
136
136
+
impl DnsResolver for MockDnsResolver {
137
137
+
async fn resolve_lexicon_did(&self, authority: &str, name_segments: &str) -> Result<String> {
138
138
+
let dns_name = construct_dns_name(authority, name_segments);
139
139
+
self.records
140
140
+
.lock()
141
141
+
.unwrap()
142
142
+
.get(&dns_name)
143
143
+
.cloned()
144
144
+
.ok_or_else(|| LexiconFetcherError::LookupFailed {
145
145
+
domain: dns_name.clone(),
146
146
+
error: "No mock record found".to_string(),
147
147
+
})
148
148
+
}
149
149
+
}
150
150
+
151
151
+
/// Construct DNS name from authority and name segments
152
152
+
/// For "app.bsky" + "actor": "_lexicon.actor.bsky.app"
153
153
+
/// For "place.stream" + "key": "_lexicon.key.stream.place"
154
154
+
pub fn construct_dns_name(authority: &str, name_segments: &str) -> String {
155
155
+
let auth_parts: Vec<&str> = authority.split('.').collect();
156
156
+
let reversed_auth: Vec<&str> = auth_parts.iter().rev().copied().collect();
157
157
+
158
158
+
if name_segments.is_empty() {
159
159
+
// No name segments, just use reversed authority
160
160
+
// For "place.stream": "_lexicon.stream.place"
161
161
+
format!("_lexicon.{}", reversed_auth.join("."))
162
162
+
} else {
163
163
+
// Prepend name segments before reversed authority
164
164
+
// For "app.bsky" + "actor": "_lexicon.actor.bsky.app"
165
165
+
format!("_lexicon.{}.{}", name_segments, reversed_auth.join("."))
166
166
+
}
167
167
+
}
168
168
+
169
169
+
/// Parse NSID into authority and name segments
170
170
+
/// For "place.stream.key", returns ("place.stream", "key")
171
171
+
/// For "app.bsky.actor.profile", returns ("app.bsky", "actor.profile")
172
172
+
pub fn parse_nsid(nsid: &str) -> Result<(String, String)> {
173
173
+
// NSID format: authority.name(.name)*
174
174
+
// Authority is first 2 segments (reversed domain)
175
175
+
let parts: Vec<&str> = nsid.split('.').collect();
176
176
+
177
177
+
if parts.len() < 2 {
178
178
+
return Err(LexiconFetcherError::InvalidNsid(format!(
179
179
+
"NSID must have at least 2 segments: {}",
180
180
+
nsid
181
181
+
)));
182
182
+
}
183
183
+
184
184
+
// Authority is first 2 segments
185
185
+
let authority = format!("{}.{}", parts[0], parts[1]);
186
186
+
187
187
+
// Name segments are everything after the authority
188
188
+
let name_segments = if parts.len() > 2 {
189
189
+
parts[2..].join(".")
190
190
+
} else {
191
191
+
String::new()
192
192
+
};
193
193
+
194
194
+
Ok((authority, name_segments))
195
195
+
}
196
196
+
197
197
+
/// Trait for HTTP client - allows mocking in tests
198
198
+
#[async_trait]
199
199
+
pub trait HttpClient: Send + Sync {
200
200
+
/// Fetch a single lexicon by NSID from a DID's server
201
201
+
async fn fetch_lexicon(&self, did: &str, nsid: &str) -> Result<serde_json::Value>;
202
202
+
203
203
+
/// Fetch all lexicons matching a pattern (e.g., "place.stream.*")
204
204
+
async fn fetch_lexicons_pattern(
205
205
+
&self,
206
206
+
did: &str,
207
207
+
pattern: &str,
208
208
+
) -> Result<Vec<(String, serde_json::Value)>>;
209
209
+
}
210
210
+
211
211
+
/// Real HTTP client using reqwest
212
212
+
pub struct RealHttpClient {
213
213
+
client: reqwest::Client,
214
214
+
}
215
215
+
216
216
+
impl RealHttpClient {
217
217
+
pub fn new() -> Self {
218
218
+
Self {
219
219
+
client: reqwest::Client::new(),
220
220
+
}
221
221
+
}
222
222
+
223
223
+
pub fn with_client(client: reqwest::Client) -> Self {
224
224
+
Self { client }
225
225
+
}
226
226
+
}
227
227
+
228
228
+
impl Default for RealHttpClient {
229
229
+
fn default() -> Self {
230
230
+
Self::new()
231
231
+
}
232
232
+
}
233
233
+
234
234
+
#[async_trait]
235
235
+
impl HttpClient for RealHttpClient {
236
236
+
async fn fetch_lexicon(&self, did: &str, nsid: &str) -> Result<serde_json::Value> {
237
237
+
// Fetch all records from the DID's repo
238
238
+
let records = self.fetch_records_from_did(did).await?;
239
239
+
240
240
+
// Find the specific NSID
241
241
+
for record in records {
242
242
+
let record_nsid = extract_nsid_from_record(&record)?;
243
243
+
if record_nsid == nsid {
244
244
+
return Ok(record.value);
245
245
+
}
246
246
+
}
247
247
+
248
248
+
Err(LexiconFetcherError::LexiconNotFound(format!(
249
249
+
"Lexicon {} not found in repo {}",
250
250
+
nsid, did
251
251
+
)))
252
252
+
}
253
253
+
254
254
+
async fn fetch_lexicons_pattern(
255
255
+
&self,
256
256
+
did: &str,
257
257
+
pattern: &str,
258
258
+
) -> Result<Vec<(String, serde_json::Value)>> {
259
259
+
// Fetch all records from the DID's repo
260
260
+
let records = self.fetch_records_from_did(did).await?;
261
261
+
262
262
+
let mut results = Vec::new();
263
263
+
264
264
+
// Handle exact match (no wildcard)
265
265
+
if !pattern.contains('*') && !pattern.contains('_') {
266
266
+
for record in records {
267
267
+
let record_nsid = extract_nsid_from_record(&record)?;
268
268
+
if record_nsid == pattern {
269
269
+
results.push((record_nsid, record.value));
270
270
+
}
271
271
+
}
272
272
+
return Ok(results);
273
273
+
}
274
274
+
275
275
+
// Handle wildcard patterns
276
276
+
if pattern.ends_with(".*") {
277
277
+
// "*" matches EVERYTHING
278
278
+
// For "place.stream.*", match all: place.stream.chat, place.stream.chat.profile, etc.
279
279
+
let base = pattern.strip_suffix(".*").unwrap();
280
280
+
let prefix_with_dot = format!("{}.", base);
281
281
+
282
282
+
for record in records {
283
283
+
let record_nsid = extract_nsid_from_record(&record)?;
284
284
+
if record_nsid.starts_with(&prefix_with_dot) {
285
285
+
results.push((record_nsid, record.value));
286
286
+
}
287
287
+
}
288
288
+
} else if pattern.ends_with("._") {
289
289
+
// "_" matches only DIRECT CHILDREN
290
290
+
// For "place.stream._", match place.stream.chat but NOT place.stream.chat.profile
291
291
+
let base = pattern.strip_suffix("._").unwrap();
292
292
+
let prefix_with_dot = format!("{}.", base);
293
293
+
294
294
+
for record in records {
295
295
+
let record_nsid = extract_nsid_from_record(&record)?;
296
296
+
297
297
+
if let Some(suffix) = record_nsid.strip_prefix(&prefix_with_dot) {
298
298
+
// Check if it's a direct child (no more dots in the suffix)
299
299
+
if !suffix.contains('.') && !suffix.is_empty() {
300
300
+
results.push((record_nsid, record.value));
301
301
+
}
302
302
+
}
303
303
+
}
304
304
+
}
305
305
+
306
306
+
Ok(results)
307
307
+
}
308
308
+
}
309
309
+
310
310
+
impl RealHttpClient {
311
311
+
/// Fetch all lexicon records from a DID's ATProto repository
312
312
+
async fn fetch_records_from_did(&self, did: &str) -> Result<Vec<AtProtoRecord>> {
313
313
+
// Resolve DID to PDS URL
314
314
+
let pds_url = self.resolve_did_to_pds(did).await?;
315
315
+
316
316
+
let mut all_records = Vec::new();
317
317
+
let mut cursor: Option<String> = None;
318
318
+
319
319
+
// Paginate through all records
320
320
+
loop {
321
321
+
let url = if let Some(ref c) = cursor {
322
322
+
format!(
323
323
+
"{}/xrpc/com.atproto.repo.listRecords?repo={}&collection=com.atproto.lexicon.schema&cursor={}",
324
324
+
pds_url, did, c
325
325
+
)
326
326
+
} else {
327
327
+
format!(
328
328
+
"{}/xrpc/com.atproto.repo.listRecords?repo={}&collection=com.atproto.lexicon.schema",
329
329
+
pds_url, did
330
330
+
)
331
331
+
};
332
332
+
333
333
+
let response = self
334
334
+
.client
335
335
+
.get(&url)
336
336
+
.send()
337
337
+
.await
338
338
+
.map_err(|e| LexiconFetcherError::HttpRequestFailed(e.to_string()))?;
339
339
+
340
340
+
if !response.status().is_success() {
341
341
+
return Err(LexiconFetcherError::HttpRequestFailed(format!(
342
342
+
"HTTP {} when fetching records from {}",
343
343
+
response.status(),
344
344
+
did
345
345
+
)));
346
346
+
}
347
347
+
348
348
+
let mut list_response: serde_json::Value = response
349
349
+
.json()
350
350
+
.await
351
351
+
.map_err(|e| LexiconFetcherError::JsonParseFailed(e.to_string()))?;
352
352
+
353
353
+
// Extract records
354
354
+
if let Some(records_array) = list_response.get_mut("records") {
355
355
+
if let Some(records) = records_array.as_array_mut() {
356
356
+
for record_value in records.drain(..) {
357
357
+
let record: AtProtoRecord = serde_json::from_value(record_value)
358
358
+
.map_err(|e| LexiconFetcherError::JsonParseFailed(format!("Failed to parse record: {}", e)))?;
359
359
+
all_records.push(record);
360
360
+
}
361
361
+
}
362
362
+
}
363
363
+
364
364
+
// Check for pagination cursor
365
365
+
cursor = list_response
366
366
+
.get("cursor")
367
367
+
.and_then(|c| c.as_str())
368
368
+
.map(|s| s.to_string());
369
369
+
370
370
+
if cursor.is_none() {
371
371
+
break;
372
372
+
}
373
373
+
}
374
374
+
375
375
+
Ok(all_records)
376
376
+
}
377
377
+
378
378
+
/// Resolve a DID to its PDS URL
379
379
+
async fn resolve_did_to_pds(&self, did: &str) -> Result<String> {
380
380
+
// For did:web:, extract the domain
381
381
+
if let Some(domain) = did.strip_prefix("did:web:") {
382
382
+
return Ok(format!("https://{}", domain));
383
383
+
}
384
384
+
385
385
+
// For did:plc:, query the PLC directory
386
386
+
if did.starts_with("did:plc:") {
387
387
+
let url = format!("https://plc.directory/{}", did);
388
388
+
389
389
+
let response = self
390
390
+
.client
391
391
+
.get(&url)
392
392
+
.send()
393
393
+
.await
394
394
+
.map_err(|e| LexiconFetcherError::HttpRequestFailed(format!("Failed to resolve DID: {}", e)))?;
395
395
+
396
396
+
if !response.status().is_success() {
397
397
+
return Err(LexiconFetcherError::HttpRequestFailed(format!(
398
398
+
"Failed to resolve DID {}: HTTP {}",
399
399
+
did,
400
400
+
response.status()
401
401
+
)));
402
402
+
}
403
403
+
404
404
+
let did_doc: serde_json::Value = response
405
405
+
.json()
406
406
+
.await
407
407
+
.map_err(|e| LexiconFetcherError::JsonParseFailed(format!("Failed to parse DID document: {}", e)))?;
408
408
+
409
409
+
// Extract PDS endpoint from service array
410
410
+
if let Some(services) = did_doc.get("service").and_then(|v| v.as_array()) {
411
411
+
for service in services {
412
412
+
if service.get("type").and_then(|v| v.as_str()) == Some("AtprotoPersonalDataServer") {
413
413
+
if let Some(endpoint) = service.get("serviceEndpoint").and_then(|v| v.as_str()) {
414
414
+
return Ok(endpoint.trim_end_matches('/').to_string());
415
415
+
}
416
416
+
}
417
417
+
}
418
418
+
}
419
419
+
420
420
+
return Err(LexiconFetcherError::HttpRequestFailed(format!(
421
421
+
"No PDS endpoint found in DID document for {}",
422
422
+
did
423
423
+
)));
424
424
+
}
425
425
+
426
426
+
Err(LexiconFetcherError::InvalidUrl(format!(
427
427
+
"Unsupported DID format: {}",
428
428
+
did
429
429
+
)))
430
430
+
}
431
431
+
}
432
432
+
433
433
+
/// Mock HTTP client for testing
434
434
+
#[derive(Clone)]
435
435
+
pub struct MockHttpClient {
436
436
+
lexicons: Arc<Mutex<HashMap<String, serde_json::Value>>>,
437
437
+
}
438
438
+
439
439
+
impl MockHttpClient {
440
440
+
pub fn new() -> Self {
441
441
+
Self {
442
442
+
lexicons: Arc::new(Mutex::new(HashMap::new())),
443
443
+
}
444
444
+
}
445
445
+
446
446
+
/// Add a mock lexicon response for a specific NSID
447
447
+
pub fn add_lexicon(&mut self, nsid: String, lexicon: serde_json::Value) {
448
448
+
self.lexicons.lock().unwrap().insert(nsid, lexicon);
449
449
+
}
450
450
+
}
451
451
+
452
452
+
impl Default for MockHttpClient {
453
453
+
fn default() -> Self {
454
454
+
Self::new()
455
455
+
}
456
456
+
}
457
457
+
458
458
+
#[async_trait]
459
459
+
impl HttpClient for MockHttpClient {
460
460
+
async fn fetch_lexicon(&self, _did: &str, nsid: &str) -> Result<serde_json::Value> {
461
461
+
self.lexicons
462
462
+
.lock()
463
463
+
.unwrap()
464
464
+
.get(nsid)
465
465
+
.cloned()
466
466
+
.ok_or_else(|| LexiconFetcherError::LexiconNotFound(nsid.to_string()))
467
467
+
}
468
468
+
469
469
+
async fn fetch_lexicons_pattern(
470
470
+
&self,
471
471
+
_did: &str,
472
472
+
pattern: &str,
473
473
+
) -> Result<Vec<(String, serde_json::Value)>> {
474
474
+
let lexicons = self.lexicons.lock().unwrap();
475
475
+
let mut results = Vec::new();
476
476
+
477
477
+
// Handle exact match (no wildcard)
478
478
+
if !pattern.contains('*') && !pattern.contains('_') {
479
479
+
if let Some(lexicon) = lexicons.get(pattern) {
480
480
+
results.push((pattern.to_string(), lexicon.clone()));
481
481
+
}
482
482
+
return Ok(results);
483
483
+
}
484
484
+
485
485
+
// Handle wildcard patterns
486
486
+
if pattern.ends_with(".*") {
487
487
+
// "*" matches EVERYTHING
488
488
+
// For "place.stream.*", match all: place.stream.chat, place.stream.chat.profile, etc.
489
489
+
let base = pattern.strip_suffix(".*").unwrap();
490
490
+
let prefix_with_dot = format!("{}.", base);
491
491
+
492
492
+
for (nsid, lexicon) in lexicons.iter() {
493
493
+
if nsid.starts_with(&prefix_with_dot) {
494
494
+
results.push((nsid.clone(), lexicon.clone()));
495
495
+
}
496
496
+
}
497
497
+
} else if pattern.ends_with("._") {
498
498
+
// "_" matches only DIRECT CHILDREN
499
499
+
// For "place.stream._", match place.stream.chat but NOT place.stream.chat.profile
500
500
+
let base = pattern.strip_suffix("._").unwrap();
501
501
+
let prefix_with_dot = format!("{}.", base);
502
502
+
503
503
+
for (nsid, lexicon) in lexicons.iter() {
504
504
+
if let Some(suffix) = nsid.strip_prefix(&prefix_with_dot) {
505
505
+
// Check if it's a direct child (no more dots in the suffix)
506
506
+
if !suffix.contains('.') && !suffix.is_empty() {
507
507
+
results.push((nsid.clone(), lexicon.clone()));
508
508
+
}
509
509
+
}
510
510
+
}
511
511
+
}
512
512
+
513
513
+
Ok(results)
514
514
+
}
515
515
+
}
516
516
+
517
517
+
/// Main lexicon fetcher that combines DNS resolution and HTTP fetching
518
518
+
pub struct LexiconFetcher<D: DnsResolver, H: HttpClient> {
519
519
+
dns_resolver: D,
520
520
+
http_client: H,
521
521
+
}
522
522
+
523
523
+
impl<D: DnsResolver, H: HttpClient> LexiconFetcher<D, H> {
524
524
+
pub fn new(dns_resolver: D, http_client: H) -> Self {
525
525
+
Self {
526
526
+
dns_resolver,
527
527
+
http_client,
528
528
+
}
529
529
+
}
530
530
+
531
531
+
/// Fetch a single lexicon by NSID
532
532
+
/// Example: "place.stream.chat.profile" -> single lexicon JSON
533
533
+
pub async fn fetch(&self, nsid: &str) -> Result<serde_json::Value> {
534
534
+
// Check if this is a wildcard pattern
535
535
+
if nsid.contains('*') || nsid.contains('_') {
536
536
+
return Err(LexiconFetcherError::InvalidNsid(format!(
537
537
+
"Use fetch_pattern() for wildcard patterns (* or _): {}",
538
538
+
nsid
539
539
+
)));
540
540
+
}
541
541
+
542
542
+
// Parse NSID into authority and name segments
543
543
+
let (authority, name_segments) = parse_nsid(nsid)?;
544
544
+
545
545
+
// Resolve DID via DNS (async)
546
546
+
let did = self.dns_resolver.resolve_lexicon_did(&authority, &name_segments).await?;
547
547
+
548
548
+
// Fetch lexicon via HTTP
549
549
+
self.http_client.fetch_lexicon(&did, nsid).await
550
550
+
}
551
551
+
552
552
+
/// Fetch all lexicons matching a pattern
553
553
+
/// Examples:
554
554
+
/// - "place.stream.*" -> matches everything (place.stream.chat, place.stream.chat.profile, etc.)
555
555
+
/// - "place.stream._" -> matches direct children only (place.stream.chat, place.stream.key, but not place.stream.chat.profile)
556
556
+
pub async fn fetch_pattern(&self, pattern: &str) -> Result<Vec<(String, serde_json::Value)>> {
557
557
+
// Parse pattern to extract authority
558
558
+
let (authority, name_pattern) = parse_nsid(pattern)?;
559
559
+
560
560
+
// For DNS lookup, remove wildcard suffix (both .* and ._)
561
561
+
// For "place.stream.*" or "place.stream._", name_pattern is "*" or "_", so we use empty string
562
562
+
// For "place.stream.chat.*", name_pattern is "chat.*", so we use "chat"
563
563
+
let dns_name_segments = if name_pattern == "*" || name_pattern == "_" {
564
564
+
""
565
565
+
} else if let Some(pos) = name_pattern.rfind(".*") {
566
566
+
&name_pattern[..pos]
567
567
+
} else if let Some(pos) = name_pattern.rfind("._") {
568
568
+
&name_pattern[..pos]
569
569
+
} else {
570
570
+
&name_pattern
571
571
+
};
572
572
+
573
573
+
// Resolve DID via DNS (async)
574
574
+
let did = self.dns_resolver.resolve_lexicon_did(&authority, dns_name_segments).await?;
575
575
+
576
576
+
// Fetch lexicons matching pattern via HTTP
577
577
+
self.http_client.fetch_lexicons_pattern(&did, pattern).await
578
578
+
}
579
579
+
}
580
580
+
581
581
+
/// Metadata about a fetched lexicon
582
582
+
#[derive(Debug, Clone)]
583
583
+
pub struct FetchedLexicon {
584
584
+
pub nsid: String,
585
585
+
pub lexicon: serde_json::Value,
586
586
+
pub did: String,
587
587
+
}
588
588
+
589
589
+
/// Result of fetching one or more lexicons
590
590
+
#[derive(Debug)]
591
591
+
pub struct FetchResult {
592
592
+
pub lexicons: Vec<FetchedLexicon>,
593
593
+
}
594
594
+
595
595
+
/// Convenience type for production use with real DNS and HTTP
596
596
+
pub type ProductionLexiconFetcher = LexiconFetcher<RealDnsResolver, RealHttpClient>;
597
597
+
598
598
+
impl ProductionLexiconFetcher {
599
599
+
/// Create a new production fetcher with default configuration
600
600
+
pub async fn production() -> Result<Self> {
601
601
+
Ok(Self::new(RealDnsResolver::new().await?, RealHttpClient::new()))
602
602
+
}
603
603
+
}
604
604
+
605
605
+
impl<D: DnsResolver, H: HttpClient> LexiconFetcher<D, H> {
606
606
+
/// Fetch one or more lexicons and return metadata for lockfile tracking
607
607
+
/// Handles both exact NSIDs and patterns (* or _)
608
608
+
pub async fn fetch_with_metadata(&self, nsid: &str) -> Result<FetchResult> {
609
609
+
// Parse pattern to extract authority and name segments
610
610
+
let (authority, name_segments) = parse_nsid(nsid)?;
611
611
+
612
612
+
// For DNS lookup, remove wildcard suffix (both .* and ._)
613
613
+
let dns_name_segments = if name_segments == "*" || name_segments == "_" {
614
614
+
""
615
615
+
} else if let Some(pos) = name_segments.rfind(".*") {
616
616
+
&name_segments[..pos]
617
617
+
} else if let Some(pos) = name_segments.rfind("._") {
618
618
+
&name_segments[..pos]
619
619
+
} else {
620
620
+
&name_segments
621
621
+
};
622
622
+
623
623
+
// Resolve DID via DNS (async)
624
624
+
let did = self.dns_resolver.resolve_lexicon_did(&authority, dns_name_segments).await?;
625
625
+
626
626
+
// Fetch lexicons
627
627
+
let lexicons = if nsid.contains('*') || nsid.contains('_') {
628
628
+
self.http_client.fetch_lexicons_pattern(&did, nsid).await?
629
629
+
} else {
630
630
+
let lexicon = self.http_client.fetch_lexicon(&did, nsid).await?;
631
631
+
vec![(nsid.to_string(), lexicon)]
632
632
+
};
633
633
+
634
634
+
// Package results with metadata
635
635
+
let fetched_lexicons = lexicons
636
636
+
.into_iter()
637
637
+
.map(|(nsid, lexicon)| FetchedLexicon {
638
638
+
nsid,
639
639
+
lexicon,
640
640
+
did: did.clone(),
641
641
+
})
642
642
+
.collect();
643
643
+
644
644
+
Ok(FetchResult {
645
645
+
lexicons: fetched_lexicons,
646
646
+
})
647
647
+
}
648
648
+
649
649
+
/// Fetch multiple NSIDs in sequence (no optimization)
650
650
+
pub async fn fetch_many(&self, nsids: &[String]) -> Result<Vec<FetchResult>> {
651
651
+
let mut results = Vec::new();
652
652
+
for nsid in nsids {
653
653
+
results.push(self.fetch_with_metadata(nsid).await?);
654
654
+
}
655
655
+
Ok(results)
656
656
+
}
657
657
+
658
658
+
/// Fetch multiple NSIDs with optimization to reduce network requests
659
659
+
/// Groups similar NSIDs into wildcard patterns when beneficial
660
660
+
/// Example: ["app.bsky.actor.foo", "app.bsky.actor.bar"] -> fetches "app.bsky.actor.*"
661
661
+
pub async fn fetch_many_optimized(&self, nsids: &[String]) -> Result<Vec<FetchResult>> {
662
662
+
use std::collections::HashSet;
663
663
+
664
664
+
if nsids.is_empty() {
665
665
+
return Ok(Vec::new());
666
666
+
}
667
667
+
668
668
+
// Convert to HashSet for optimization
669
669
+
let nsids_set: HashSet<String> = nsids.iter().cloned().collect();
670
670
+
671
671
+
// Optimize into minimal set of patterns
672
672
+
let optimized_patterns = optimize_fetch_patterns(&nsids_set);
673
673
+
674
674
+
// Fetch each optimized pattern
675
675
+
let mut results = Vec::new();
676
676
+
for pattern in optimized_patterns {
677
677
+
results.push(self.fetch_with_metadata(&pattern).await?);
678
678
+
}
679
679
+
680
680
+
Ok(results)
681
681
+
}
682
682
+
683
683
+
/// Fetch lexicon(s) from a known DID, bypassing DNS resolution
684
684
+
/// Useful when fetching from lockfile where DID is already known
685
685
+
/// Handles both exact NSIDs and patterns (* or _)
686
686
+
pub async fn fetch_from_did_with_metadata(&self, did: &str, nsid: &str) -> Result<FetchResult> {
687
687
+
// Fetch lexicons directly from the DID
688
688
+
let lexicons = if nsid.contains('*') || nsid.contains('_') {
689
689
+
self.http_client.fetch_lexicons_pattern(did, nsid).await?
690
690
+
} else {
691
691
+
let lexicon = self.http_client.fetch_lexicon(did, nsid).await?;
692
692
+
vec![(nsid.to_string(), lexicon)]
693
693
+
};
694
694
+
695
695
+
// Package results with metadata
696
696
+
let fetched_lexicons = lexicons
697
697
+
.into_iter()
698
698
+
.map(|(nsid, lexicon)| FetchedLexicon {
699
699
+
nsid,
700
700
+
lexicon,
701
701
+
did: did.to_string(),
702
702
+
})
703
703
+
.collect();
704
704
+
705
705
+
Ok(FetchResult {
706
706
+
lexicons: fetched_lexicons,
707
707
+
})
708
708
+
}
709
709
+
}
710
710
+
711
711
+
/// Convenience type for testing with mocks
712
712
+
pub type MockLexiconFetcher = LexiconFetcher<MockDnsResolver, MockHttpClient>;
713
713
+
714
714
+
impl MockLexiconFetcher {
715
715
+
/// Create a new mock fetcher for testing
716
716
+
pub fn mock() -> Self {
717
717
+
Self::new(MockDnsResolver::new(), MockHttpClient::new())
718
718
+
}
719
719
+
}
720
720
+
721
721
+
/// Extract NSID from an ATProto record
722
722
+
fn extract_nsid_from_record(record: &AtProtoRecord) -> Result<String> {
723
723
+
// The record value should have an "id" field with the NSID
724
724
+
if let Some(id) = record.value.get("id").and_then(|v| v.as_str()) {
725
725
+
return Ok(id.to_string());
726
726
+
}
727
727
+
728
728
+
// Fallback: try to extract from URI
729
729
+
// URI format: at://did:plc:xxx/com.atproto.lexicon.schema/nsid
730
730
+
if let Some(rkey) = record.uri.split('/').last() {
731
731
+
return Ok(rkey.to_string());
732
732
+
}
733
733
+
734
734
+
Err(LexiconFetcherError::HttpRequestFailed(format!(
735
735
+
"Could not extract NSID from record: {}",
736
736
+
record.uri
737
737
+
)))
738
738
+
}
739
739
+
740
740
+
/// Optimize a set of NSIDs by collapsing them into the minimal set of fetch patterns
741
741
+
/// For example: ["app.bsky.actor.foo", "app.bsky.actor.bar"] -> ["app.bsky.actor.*"]
742
742
+
/// This function tries multiple grouping strategies to find the most efficient pattern
743
743
+
pub fn optimize_fetch_patterns(nsids: &std::collections::HashSet<String>) -> Vec<String> {
744
744
+
use std::collections::{BTreeMap, HashSet};
745
745
+
746
746
+
if nsids.is_empty() {
747
747
+
return Vec::new();
748
748
+
}
749
749
+
750
750
+
// Strategy 1: Try grouping by authority (first 2 segments)
751
751
+
// e.g., ["app.bsky.actor.foo", "app.bsky.feed.bar"] -> ["app.bsky.*"]
752
752
+
let mut authority_groups: BTreeMap<String, Vec<String>> = BTreeMap::new();
753
753
+
754
754
+
for nsid in nsids {
755
755
+
let parts: Vec<&str> = nsid.split('.').collect();
756
756
+
if parts.len() >= 2 {
757
757
+
let authority = format!("{}.{}", parts[0], parts[1]);
758
758
+
authority_groups.entry(authority).or_insert_with(Vec::new).push(nsid.clone());
759
759
+
}
760
760
+
}
761
761
+
762
762
+
// Strategy 2: Try grouping by namespace prefix (all but last segment)
763
763
+
// e.g., ["app.bsky.actor.foo", "app.bsky.actor.bar"] -> ["app.bsky.actor.*"]
764
764
+
let mut prefix_groups: BTreeMap<String, Vec<String>> = BTreeMap::new();
765
765
+
766
766
+
for nsid in nsids {
767
767
+
let parts: Vec<&str> = nsid.split('.').collect();
768
768
+
if parts.len() >= 3 {
769
769
+
let prefix = parts[..parts.len() - 1].join(".");
770
770
+
prefix_groups.entry(prefix).or_insert_with(Vec::new).push(nsid.clone());
771
771
+
}
772
772
+
}
773
773
+
774
774
+
let mut result = Vec::new();
775
775
+
let mut handled_nsids = HashSet::new();
776
776
+
777
777
+
// First pass: Apply namespace-level grouping (more specific)
778
778
+
for (prefix, group) in &prefix_groups {
779
779
+
if group.len() >= 2 && !handled_nsids.contains(&group[0]) {
780
780
+
result.push(format!("{}.*", prefix));
781
781
+
for nsid in group {
782
782
+
handled_nsids.insert(nsid.clone());
783
783
+
}
784
784
+
}
785
785
+
}
786
786
+
787
787
+
// Second pass: For remaining NSIDs, consider authority-level grouping
788
788
+
// Only use authority wildcard if we have 3+ different namespaces under same authority
789
789
+
for (authority, group) in &authority_groups {
790
790
+
let unhandled: Vec<&String> = group.iter()
791
791
+
.filter(|nsid| !handled_nsids.contains(*nsid))
792
792
+
.collect();
793
793
+
794
794
+
if unhandled.len() >= 3 {
795
795
+
result.push(format!("{}.*", authority));
796
796
+
for nsid in &unhandled {
797
797
+
handled_nsids.insert((*nsid).clone());
798
798
+
}
799
799
+
}
800
800
+
}
801
801
+
802
802
+
// Third pass: Add remaining individual NSIDs
803
803
+
for nsid in nsids {
804
804
+
if !handled_nsids.contains(nsid) {
805
805
+
result.push(nsid.clone());
806
806
+
}
807
807
+
}
808
808
+
809
809
+
// Sort for consistent output
810
810
+
result.sort();
811
811
+
result
812
812
+
}
813
813
+
814
814
+
#[cfg(test)]
815
815
+
mod tests {
816
816
+
use super::*;
817
817
+
818
818
+
#[test]
819
819
+
fn test_construct_dns_name() {
820
820
+
assert_eq!(
821
821
+
construct_dns_name("place.stream", "key"),
822
822
+
"_lexicon.key.stream.place"
823
823
+
);
824
824
+
assert_eq!(
825
825
+
construct_dns_name("app.bsky", "actor"),
826
826
+
"_lexicon.actor.bsky.app"
827
827
+
);
828
828
+
assert_eq!(
829
829
+
construct_dns_name("app.bsky", "actor.profile"),
830
830
+
"_lexicon.actor.profile.bsky.app"
831
831
+
);
832
832
+
assert_eq!(
833
833
+
construct_dns_name("place.stream", ""),
834
834
+
"_lexicon.stream.place"
835
835
+
);
836
836
+
}
837
837
+
838
838
+
#[test]
839
839
+
fn test_parse_nsid() {
840
840
+
let (auth, name) = parse_nsid("place.stream.key").unwrap();
841
841
+
assert_eq!(auth, "place.stream");
842
842
+
assert_eq!(name, "key");
843
843
+
844
844
+
let (auth, name) = parse_nsid("app.bsky.actor.profile").unwrap();
845
845
+
assert_eq!(auth, "app.bsky");
846
846
+
assert_eq!(name, "actor.profile");
847
847
+
848
848
+
let (auth, name) = parse_nsid("place.stream").unwrap();
849
849
+
assert_eq!(auth, "place.stream");
850
850
+
assert_eq!(name, "");
851
851
+
852
852
+
assert!(parse_nsid("invalid").is_err());
853
853
+
}
854
854
+
855
855
+
#[tokio::test]
856
856
+
async fn test_mock_dns_resolver() {
857
857
+
let mut resolver = MockDnsResolver::new();
858
858
+
resolver.add_record("place.stream", "key", "did:plc:test123".to_string());
859
859
+
860
860
+
let did = resolver.resolve_lexicon_did("place.stream", "key").await.unwrap();
861
861
+
assert_eq!(did, "did:plc:test123");
862
862
+
863
863
+
let result = resolver.resolve_lexicon_did("place.stream", "notfound").await;
864
864
+
assert!(result.is_err());
865
865
+
}
866
866
+
867
867
+
#[tokio::test]
868
868
+
async fn test_mock_dns_resolver_from_nsid() {
869
869
+
let mut resolver = MockDnsResolver::new();
870
870
+
resolver
871
871
+
.add_record_from_nsid("app.bsky.actor.profile", "did:plc:bsky123".to_string())
872
872
+
.unwrap();
873
873
+
874
874
+
let did = resolver
875
875
+
.resolve_lexicon_did("app.bsky", "actor.profile")
876
876
+
.await
877
877
+
.unwrap();
878
878
+
assert_eq!(did, "did:plc:bsky123");
879
879
+
}
880
880
+
}
+286
mlf-lexicon-fetcher/tests/dns_scenarios.rs
···
1
1
+
// Comprehensive DNS resolver tests covering various scenarios
2
2
+
3
3
+
use mlf_lexicon_fetcher::{DnsResolver, MockDnsResolver};
4
4
+
5
5
+
#[tokio::test]
6
6
+
async fn test_successful_lookup() {
7
7
+
let mut resolver = MockDnsResolver::new();
8
8
+
resolver.add_record("place.stream", "key", "did:plc:abc123def456".to_string());
9
9
+
10
10
+
let result = resolver.resolve_lexicon_did("place.stream", "key").await;
11
11
+
assert!(result.is_ok());
12
12
+
assert_eq!(result.unwrap(), "did:plc:abc123def456");
13
13
+
}
14
14
+
15
15
+
#[tokio::test]
16
16
+
async fn test_dns_record_not_found() {
17
17
+
let resolver = MockDnsResolver::new();
18
18
+
let result = resolver.resolve_lexicon_did("nonexistent.domain", "test").await;
19
19
+
assert!(result.is_err());
20
20
+
assert!(result.unwrap_err().to_string().contains("No mock record found"));
21
21
+
}
22
22
+
23
23
+
#[tokio::test]
24
24
+
async fn test_multiple_authority_types() {
25
25
+
let mut resolver = MockDnsResolver::new();
26
26
+
27
27
+
// app.bsky authority
28
28
+
resolver.add_record("app.bsky", "actor", "did:plc:bsky001".to_string());
29
29
+
30
30
+
// place.stream authority
31
31
+
resolver.add_record("place.stream", "chat", "did:plc:stream001".to_string());
32
32
+
33
33
+
// com.atproto authority
34
34
+
resolver.add_record("com.atproto", "repo", "did:plc:atproto001".to_string());
35
35
+
36
36
+
assert_eq!(
37
37
+
resolver.resolve_lexicon_did("app.bsky", "actor").await.unwrap(),
38
38
+
"did:plc:bsky001"
39
39
+
);
40
40
+
assert_eq!(
41
41
+
resolver.resolve_lexicon_did("place.stream", "chat").await.unwrap(),
42
42
+
"did:plc:stream001"
43
43
+
);
44
44
+
assert_eq!(
45
45
+
resolver.resolve_lexicon_did("com.atproto", "repo").await.unwrap(),
46
46
+
"did:plc:atproto001"
47
47
+
);
48
48
+
}
49
49
+
50
50
+
#[tokio::test]
51
51
+
async fn test_nested_name_segments() {
52
52
+
let mut resolver = MockDnsResolver::new();
53
53
+
54
54
+
// Single segment
55
55
+
resolver.add_record("app.bsky", "actor", "did:plc:single".to_string());
56
56
+
57
57
+
// Two segments
58
58
+
resolver.add_record("app.bsky", "actor.profile", "did:plc:double".to_string());
59
59
+
60
60
+
// Three segments
61
61
+
resolver.add_record("app.bsky", "actor.profile.detailed", "did:plc:triple".to_string());
62
62
+
63
63
+
assert_eq!(
64
64
+
resolver.resolve_lexicon_did("app.bsky", "actor").await.unwrap(),
65
65
+
"did:plc:single"
66
66
+
);
67
67
+
assert_eq!(
68
68
+
resolver.resolve_lexicon_did("app.bsky", "actor.profile").await.unwrap(),
69
69
+
"did:plc:double"
70
70
+
);
71
71
+
assert_eq!(
72
72
+
resolver.resolve_lexicon_did("app.bsky", "actor.profile.detailed").await.unwrap(),
73
73
+
"did:plc:triple"
74
74
+
);
75
75
+
}
76
76
+
77
77
+
#[tokio::test]
78
78
+
async fn test_empty_name_segments() {
79
79
+
let mut resolver = MockDnsResolver::new();
80
80
+
81
81
+
// Authority only (no name segments)
82
82
+
resolver.add_record("place.stream", "", "did:plc:root".to_string());
83
83
+
84
84
+
assert_eq!(
85
85
+
resolver.resolve_lexicon_did("place.stream", "").await.unwrap(),
86
86
+
"did:plc:root"
87
87
+
);
88
88
+
}
89
89
+
90
90
+
#[tokio::test]
91
91
+
async fn test_did_web_format() {
92
92
+
let mut resolver = MockDnsResolver::new();
93
93
+
94
94
+
resolver.add_record("example.com", "api", "did:web:example.com".to_string());
95
95
+
96
96
+
assert_eq!(
97
97
+
resolver.resolve_lexicon_did("example.com", "api").await.unwrap(),
98
98
+
"did:web:example.com"
99
99
+
);
100
100
+
}
101
101
+
102
102
+
#[tokio::test]
103
103
+
async fn test_did_plc_format() {
104
104
+
let mut resolver = MockDnsResolver::new();
105
105
+
106
106
+
// Real-world style PLC DIDs
107
107
+
resolver.add_record(
108
108
+
"app.bsky",
109
109
+
"feed",
110
110
+
"did:plc:z72i7hdynmk6r22z27h6tvur".to_string()
111
111
+
);
112
112
+
113
113
+
let did = resolver.resolve_lexicon_did("app.bsky", "feed").await.unwrap();
114
114
+
assert!(did.starts_with("did:plc:"));
115
115
+
assert_eq!(did.len(), 32); // "did:plc:" (8) + 24 chars
116
116
+
}
117
117
+
118
118
+
#[tokio::test]
119
119
+
async fn test_add_record_from_nsid() {
120
120
+
let mut resolver = MockDnsResolver::new();
121
121
+
122
122
+
// Add using full NSID
123
123
+
resolver.add_record_from_nsid("place.stream.key", "did:plc:test".to_string()).unwrap();
124
124
+
resolver.add_record_from_nsid("app.bsky.actor.profile", "did:plc:bsky".to_string()).unwrap();
125
125
+
126
126
+
assert_eq!(
127
127
+
resolver.resolve_lexicon_did("place.stream", "key").await.unwrap(),
128
128
+
"did:plc:test"
129
129
+
);
130
130
+
assert_eq!(
131
131
+
resolver.resolve_lexicon_did("app.bsky", "actor.profile").await.unwrap(),
132
132
+
"did:plc:bsky"
133
133
+
);
134
134
+
}
135
135
+
136
136
+
#[tokio::test]
137
137
+
async fn test_invalid_nsid_format() {
138
138
+
let mut resolver = MockDnsResolver::new();
139
139
+
140
140
+
// NSID with only one segment
141
141
+
let result = resolver.add_record_from_nsid("invalid", "did:plc:test".to_string());
142
142
+
assert!(result.is_err());
143
143
+
assert!(result.unwrap_err().to_string().contains("at least 2 segments"));
144
144
+
}
145
145
+
146
146
+
#[tokio::test]
147
147
+
async fn test_case_sensitivity() {
148
148
+
let mut resolver = MockDnsResolver::new();
149
149
+
150
150
+
// ATProto NSIDs are case-sensitive
151
151
+
resolver.add_record("App.Bsky", "Actor", "did:plc:uppercase".to_string());
152
152
+
resolver.add_record("app.bsky", "actor", "did:plc:lowercase".to_string());
153
153
+
154
154
+
// These should be treated as different domains
155
155
+
assert_eq!(
156
156
+
resolver.resolve_lexicon_did("App.Bsky", "Actor").await.unwrap(),
157
157
+
"did:plc:uppercase"
158
158
+
);
159
159
+
assert_eq!(
160
160
+
resolver.resolve_lexicon_did("app.bsky", "actor").await.unwrap(),
161
161
+
"did:plc:lowercase"
162
162
+
);
163
163
+
}
164
164
+
165
165
+
#[tokio::test]
166
166
+
async fn test_concurrent_lookups() {
167
167
+
use std::sync::Arc;
168
168
+
169
169
+
let mut resolver = MockDnsResolver::new();
170
170
+
resolver.add_record("app.bsky", "feed", "did:plc:concurrent".to_string());
171
171
+
172
172
+
let resolver = Arc::new(resolver);
173
173
+
let mut handles = vec![];
174
174
+
175
175
+
// Spawn 10 tasks doing concurrent lookups
176
176
+
for _ in 0..10 {
177
177
+
let resolver_clone = Arc::clone(&resolver);
178
178
+
let handle = tokio::spawn(async move {
179
179
+
resolver_clone.resolve_lexicon_did("app.bsky", "feed").await.unwrap()
180
180
+
});
181
181
+
handles.push(handle);
182
182
+
}
183
183
+
184
184
+
// All should succeed
185
185
+
for handle in handles {
186
186
+
assert_eq!(handle.await.unwrap(), "did:plc:concurrent");
187
187
+
}
188
188
+
}
189
189
+
190
190
+
#[tokio::test]
191
191
+
async fn test_wildcard_namespace_scenarios() {
192
192
+
let mut resolver = MockDnsResolver::new();
193
193
+
194
194
+
// Simulate multiple lexicons under same namespace
195
195
+
resolver.add_record("app.bsky", "actor.defs", "did:plc:bsky".to_string());
196
196
+
resolver.add_record("app.bsky", "actor.profile", "did:plc:bsky".to_string());
197
197
+
resolver.add_record("app.bsky", "feed.post", "did:plc:bsky".to_string());
198
198
+
resolver.add_record("app.bsky", "feed.like", "did:plc:bsky".to_string());
199
199
+
200
200
+
// All should resolve to the same DID
201
201
+
assert_eq!(
202
202
+
resolver.resolve_lexicon_did("app.bsky", "actor.defs").await.unwrap(),
203
203
+
"did:plc:bsky"
204
204
+
);
205
205
+
assert_eq!(
206
206
+
resolver.resolve_lexicon_did("app.bsky", "feed.post").await.unwrap(),
207
207
+
"did:plc:bsky"
208
208
+
);
209
209
+
}
210
210
+
211
211
+
#[tokio::test]
212
212
+
async fn test_dns_name_construction() {
213
213
+
use mlf_lexicon_fetcher::construct_dns_name;
214
214
+
215
215
+
// Test various NSID patterns
216
216
+
assert_eq!(
217
217
+
construct_dns_name("place.stream", "key"),
218
218
+
"_lexicon.key.stream.place"
219
219
+
);
220
220
+
assert_eq!(
221
221
+
construct_dns_name("app.bsky", "actor"),
222
222
+
"_lexicon.actor.bsky.app"
223
223
+
);
224
224
+
assert_eq!(
225
225
+
construct_dns_name("com.atproto", "repo.strongRef"),
226
226
+
"_lexicon.repo.strongRef.atproto.com"
227
227
+
);
228
228
+
}
229
229
+
230
230
+
#[tokio::test]
231
231
+
async fn test_nsid_parsing() {
232
232
+
use mlf_lexicon_fetcher::parse_nsid;
233
233
+
234
234
+
// Test various NSID formats
235
235
+
let (auth, name) = parse_nsid("place.stream.key").unwrap();
236
236
+
assert_eq!(auth, "place.stream");
237
237
+
assert_eq!(name, "key");
238
238
+
239
239
+
let (auth, name) = parse_nsid("app.bsky.actor.profile").unwrap();
240
240
+
assert_eq!(auth, "app.bsky");
241
241
+
assert_eq!(name, "actor.profile");
242
242
+
243
243
+
let (auth, name) = parse_nsid("com.atproto.repo.strongRef").unwrap();
244
244
+
assert_eq!(auth, "com.atproto");
245
245
+
assert_eq!(name, "repo.strongRef");
246
246
+
}
247
247
+
248
248
+
#[tokio::test]
249
249
+
async fn test_edge_case_empty_did() {
250
250
+
let mut resolver = MockDnsResolver::new();
251
251
+
252
252
+
// Empty DID should work (though not valid in practice)
253
253
+
resolver.add_record("test.com", "api", "".to_string());
254
254
+
255
255
+
assert_eq!(
256
256
+
resolver.resolve_lexicon_did("test.com", "api").await.unwrap(),
257
257
+
""
258
258
+
);
259
259
+
}
260
260
+
261
261
+
#[tokio::test]
262
262
+
async fn test_very_long_nsid() {
263
263
+
let mut resolver = MockDnsResolver::new();
264
264
+
265
265
+
// Very long name segments
266
266
+
let long_name = "very.long.deeply.nested.namespace.path.to.definition";
267
267
+
resolver.add_record("app.bsky", long_name, "did:plc:deep".to_string());
268
268
+
269
269
+
assert_eq!(
270
270
+
resolver.resolve_lexicon_did("app.bsky", long_name).await.unwrap(),
271
271
+
"did:plc:deep"
272
272
+
);
273
273
+
}
274
274
+
275
275
+
#[tokio::test]
276
276
+
async fn test_special_characters_in_nsid() {
277
277
+
let mut resolver = MockDnsResolver::new();
278
278
+
279
279
+
// Hyphens and numbers are valid in domain names
280
280
+
resolver.add_record("app-test.bsky-123", "actor", "did:plc:special".to_string());
281
281
+
282
282
+
assert_eq!(
283
283
+
resolver.resolve_lexicon_did("app-test.bsky-123", "actor").await.unwrap(),
284
284
+
"did:plc:special"
285
285
+
);
286
286
+
}
+360
mlf-lexicon-fetcher/tests/lexicon_fetching.rs
···
1
1
+
// Integration tests for full lexicon fetching flow (DNS + HTTP)
2
2
+
3
3
+
use mlf_lexicon_fetcher::{
4
4
+
LexiconFetcher, MockDnsResolver, MockHttpClient, LexiconFetcherError,
5
5
+
};
6
6
+
use serde_json::json;
7
7
+
8
8
+
#[tokio::test]
9
9
+
async fn test_fetch_single_lexicon() {
10
10
+
// Setup mock DNS resolver
11
11
+
let mut dns_resolver = MockDnsResolver::new();
12
12
+
dns_resolver.add_record("place.stream", "chat.profile", "did:plc:test123".to_string());
13
13
+
14
14
+
// Setup mock HTTP client
15
15
+
let mut http_client = MockHttpClient::new();
16
16
+
let lexicon_json = json!({
17
17
+
"lexicon": 1,
18
18
+
"id": "place.stream.chat.profile",
19
19
+
"defs": {
20
20
+
"main": {
21
21
+
"type": "record",
22
22
+
"description": "A chat profile record"
23
23
+
}
24
24
+
}
25
25
+
});
26
26
+
http_client.add_lexicon("place.stream.chat.profile".to_string(), lexicon_json.clone());
27
27
+
28
28
+
// Create fetcher
29
29
+
let fetcher = LexiconFetcher::new(dns_resolver, http_client);
30
30
+
31
31
+
// Fetch single lexicon
32
32
+
let result = fetcher.fetch("place.stream.chat.profile").await;
33
33
+
assert!(result.is_ok());
34
34
+
let fetched = result.unwrap();
35
35
+
assert_eq!(fetched.get("id").unwrap().as_str().unwrap(), "place.stream.chat.profile");
36
36
+
}
37
37
+
38
38
+
#[tokio::test]
39
39
+
async fn test_fetch_pattern_multiple_lexicons() {
40
40
+
// Setup mock DNS resolver
41
41
+
let mut dns_resolver = MockDnsResolver::new();
42
42
+
dns_resolver.add_record("place.stream", "chat", "did:plc:test123".to_string());
43
43
+
44
44
+
// Setup mock HTTP client with multiple lexicons
45
45
+
let mut http_client = MockHttpClient::new();
46
46
+
47
47
+
let profile_lexicon = json!({
48
48
+
"lexicon": 1,
49
49
+
"id": "place.stream.chat.profile",
50
50
+
"defs": { "main": { "type": "record" } }
51
51
+
});
52
52
+
let message_lexicon = json!({
53
53
+
"lexicon": 1,
54
54
+
"id": "place.stream.chat.message",
55
55
+
"defs": { "main": { "type": "record" } }
56
56
+
});
57
57
+
let room_lexicon = json!({
58
58
+
"lexicon": 1,
59
59
+
"id": "place.stream.chat.room",
60
60
+
"defs": { "main": { "type": "record" } }
61
61
+
});
62
62
+
63
63
+
http_client.add_lexicon("place.stream.chat.profile".to_string(), profile_lexicon);
64
64
+
http_client.add_lexicon("place.stream.chat.message".to_string(), message_lexicon);
65
65
+
http_client.add_lexicon("place.stream.chat.room".to_string(), room_lexicon);
66
66
+
67
67
+
// Create fetcher
68
68
+
let fetcher = LexiconFetcher::new(dns_resolver, http_client);
69
69
+
70
70
+
// Fetch pattern
71
71
+
let result = fetcher.fetch_pattern("place.stream.chat.*").await;
72
72
+
assert!(result.is_ok());
73
73
+
let lexicons = result.unwrap();
74
74
+
75
75
+
// Should get 3 lexicons
76
76
+
assert_eq!(lexicons.len(), 3);
77
77
+
78
78
+
// Check all NSIDs are present
79
79
+
let nsids: Vec<&str> = lexicons.iter().map(|(nsid, _)| nsid.as_str()).collect();
80
80
+
assert!(nsids.contains(&"place.stream.chat.profile"));
81
81
+
assert!(nsids.contains(&"place.stream.chat.message"));
82
82
+
assert!(nsids.contains(&"place.stream.chat.room"));
83
83
+
}
84
84
+
85
85
+
#[tokio::test]
86
86
+
async fn test_fetch_lexicon_not_found() {
87
87
+
// Setup mock DNS resolver
88
88
+
let mut dns_resolver = MockDnsResolver::new();
89
89
+
dns_resolver.add_record("place.stream", "chat.profile", "did:plc:test123".to_string());
90
90
+
91
91
+
// Setup mock HTTP client (but don't add the lexicon)
92
92
+
let http_client = MockHttpClient::new();
93
93
+
94
94
+
// Create fetcher
95
95
+
let fetcher = LexiconFetcher::new(dns_resolver, http_client);
96
96
+
97
97
+
// Try to fetch non-existent lexicon
98
98
+
let result = fetcher.fetch("place.stream.chat.profile").await;
99
99
+
assert!(result.is_err());
100
100
+
101
101
+
match result {
102
102
+
Err(LexiconFetcherError::LexiconNotFound(nsid)) => {
103
103
+
assert_eq!(nsid, "place.stream.chat.profile");
104
104
+
}
105
105
+
_ => panic!("Expected LexiconNotFound error"),
106
106
+
}
107
107
+
}
108
108
+
109
109
+
#[tokio::test]
110
110
+
async fn test_fetch_dns_lookup_failed() {
111
111
+
// Setup mock DNS resolver (but don't add the record)
112
112
+
let dns_resolver = MockDnsResolver::new();
113
113
+
114
114
+
// Setup mock HTTP client
115
115
+
let http_client = MockHttpClient::new();
116
116
+
117
117
+
// Create fetcher
118
118
+
let fetcher = LexiconFetcher::new(dns_resolver, http_client);
119
119
+
120
120
+
// Try to fetch with no DNS record
121
121
+
let result = fetcher.fetch("place.stream.chat.profile").await;
122
122
+
assert!(result.is_err());
123
123
+
124
124
+
match result {
125
125
+
Err(LexiconFetcherError::LookupFailed { domain, .. }) => {
126
126
+
assert_eq!(domain, "_lexicon.chat.profile.stream.place");
127
127
+
}
128
128
+
_ => panic!("Expected LookupFailed error"),
129
129
+
}
130
130
+
}
131
131
+
132
132
+
#[tokio::test]
133
133
+
async fn test_fetch_with_wildcard_returns_error() {
134
134
+
let dns_resolver = MockDnsResolver::new();
135
135
+
let http_client = MockHttpClient::new();
136
136
+
let fetcher = LexiconFetcher::new(dns_resolver, http_client);
137
137
+
138
138
+
// Try to use wildcard with fetch() instead of fetch_pattern()
139
139
+
let result = fetcher.fetch("place.stream.*").await;
140
140
+
assert!(result.is_err());
141
141
+
142
142
+
match result {
143
143
+
Err(LexiconFetcherError::InvalidNsid(msg)) => {
144
144
+
assert!(msg.contains("fetch_pattern()"));
145
145
+
}
146
146
+
_ => panic!("Expected InvalidNsid error"),
147
147
+
}
148
148
+
}
149
149
+
150
150
+
#[tokio::test]
151
151
+
async fn test_fetch_pattern_empty_results() {
152
152
+
// Setup mock DNS resolver
153
153
+
let mut dns_resolver = MockDnsResolver::new();
154
154
+
dns_resolver.add_record("place.stream", "chat", "did:plc:test123".to_string());
155
155
+
156
156
+
// Setup mock HTTP client with no matching lexicons
157
157
+
let mut http_client = MockHttpClient::new();
158
158
+
http_client.add_lexicon("place.stream.other.thing".to_string(), json!({}));
159
159
+
160
160
+
// Create fetcher
161
161
+
let fetcher = LexiconFetcher::new(dns_resolver, http_client);
162
162
+
163
163
+
// Fetch pattern that matches nothing
164
164
+
let result = fetcher.fetch_pattern("place.stream.chat.*").await;
165
165
+
assert!(result.is_ok());
166
166
+
let lexicons = result.unwrap();
167
167
+
assert_eq!(lexicons.len(), 0);
168
168
+
}
169
169
+
170
170
+
#[tokio::test]
171
171
+
async fn test_multiple_authorities() {
172
172
+
// Setup mock DNS resolver with multiple authorities
173
173
+
let mut dns_resolver = MockDnsResolver::new();
174
174
+
dns_resolver.add_record("place.stream", "chat.profile", "did:plc:stream123".to_string());
175
175
+
dns_resolver.add_record("app.bsky", "actor.profile", "did:plc:bsky456".to_string());
176
176
+
177
177
+
// Setup mock HTTP client
178
178
+
let mut http_client = MockHttpClient::new();
179
179
+
http_client.add_lexicon(
180
180
+
"place.stream.chat.profile".to_string(),
181
181
+
json!({"id": "place.stream.chat.profile"}),
182
182
+
);
183
183
+
http_client.add_lexicon(
184
184
+
"app.bsky.actor.profile".to_string(),
185
185
+
json!({"id": "app.bsky.actor.profile"}),
186
186
+
);
187
187
+
188
188
+
// Create fetcher
189
189
+
let fetcher = LexiconFetcher::new(dns_resolver, http_client);
190
190
+
191
191
+
// Fetch from both authorities
192
192
+
let result1 = fetcher.fetch("place.stream.chat.profile").await;
193
193
+
assert!(result1.is_ok());
194
194
+
195
195
+
let result2 = fetcher.fetch("app.bsky.actor.profile").await;
196
196
+
assert!(result2.is_ok());
197
197
+
}
198
198
+
199
199
+
#[tokio::test]
200
200
+
async fn test_nested_name_segments() {
201
201
+
// Setup mock DNS resolver
202
202
+
let mut dns_resolver = MockDnsResolver::new();
203
203
+
dns_resolver.add_record(
204
204
+
"place.stream",
205
205
+
"chat.message.attachments.image",
206
206
+
"did:plc:test123".to_string(),
207
207
+
);
208
208
+
209
209
+
// Setup mock HTTP client
210
210
+
let mut http_client = MockHttpClient::new();
211
211
+
http_client.add_lexicon(
212
212
+
"place.stream.chat.message.attachments.image".to_string(),
213
213
+
json!({"id": "place.stream.chat.message.attachments.image"}),
214
214
+
);
215
215
+
216
216
+
// Create fetcher
217
217
+
let fetcher = LexiconFetcher::new(dns_resolver, http_client);
218
218
+
219
219
+
// Fetch deeply nested lexicon
220
220
+
let result = fetcher.fetch("place.stream.chat.message.attachments.image").await;
221
221
+
assert!(result.is_ok());
222
222
+
}
223
223
+
224
224
+
#[tokio::test]
225
225
+
async fn test_concurrent_fetches() {
226
226
+
use std::sync::Arc;
227
227
+
use tokio::task;
228
228
+
229
229
+
// Setup mock DNS resolver
230
230
+
let mut dns_resolver = MockDnsResolver::new();
231
231
+
dns_resolver.add_record("place.stream", "chat.profile", "did:plc:test123".to_string());
232
232
+
233
233
+
// Setup mock HTTP client
234
234
+
let mut http_client = MockHttpClient::new();
235
235
+
http_client.add_lexicon(
236
236
+
"place.stream.chat.profile".to_string(),
237
237
+
json!({"id": "place.stream.chat.profile"}),
238
238
+
);
239
239
+
240
240
+
// Create fetcher and wrap in Arc
241
241
+
let fetcher = Arc::new(LexiconFetcher::new(dns_resolver, http_client));
242
242
+
243
243
+
// Spawn multiple concurrent fetch tasks
244
244
+
let mut handles = vec![];
245
245
+
for _ in 0..10 {
246
246
+
let fetcher_clone = Arc::clone(&fetcher);
247
247
+
let handle = task::spawn(async move {
248
248
+
fetcher_clone.fetch("place.stream.chat.profile").await
249
249
+
});
250
250
+
handles.push(handle);
251
251
+
}
252
252
+
253
253
+
// All should succeed
254
254
+
for handle in handles {
255
255
+
let result = handle.await.unwrap();
256
256
+
assert!(result.is_ok());
257
257
+
}
258
258
+
}
259
259
+
260
260
+
#[tokio::test]
261
261
+
async fn test_pattern_prefix_matching() {
262
262
+
// Setup mock DNS resolver
263
263
+
let mut dns_resolver = MockDnsResolver::new();
264
264
+
dns_resolver.add_record("app.bsky", "feed", "did:plc:test123".to_string());
265
265
+
266
266
+
// Setup mock HTTP client
267
267
+
let mut http_client = MockHttpClient::new();
268
268
+
http_client.add_lexicon("app.bsky.feed.post".to_string(), json!({"id": "app.bsky.feed.post"}));
269
269
+
http_client.add_lexicon("app.bsky.feed.like".to_string(), json!({"id": "app.bsky.feed.like"}));
270
270
+
http_client.add_lexicon("app.bsky.feed.repost".to_string(), json!({"id": "app.bsky.feed.repost"}));
271
271
+
http_client.add_lexicon("app.bsky.actor.profile".to_string(), json!({"id": "app.bsky.actor.profile"}));
272
272
+
273
273
+
// Create fetcher
274
274
+
let fetcher = LexiconFetcher::new(dns_resolver, http_client);
275
275
+
276
276
+
// Fetch only feed.* lexicons
277
277
+
let result = fetcher.fetch_pattern("app.bsky.feed.*").await;
278
278
+
assert!(result.is_ok());
279
279
+
let lexicons = result.unwrap();
280
280
+
281
281
+
// Should only get feed.* lexicons (not actor.profile)
282
282
+
assert_eq!(lexicons.len(), 3);
283
283
+
for (nsid, _) in lexicons {
284
284
+
assert!(nsid.starts_with("app.bsky.feed."));
285
285
+
}
286
286
+
}
287
287
+
288
288
+
#[tokio::test]
289
289
+
async fn test_underscore_wildcard_direct_children() {
290
290
+
// Setup mock DNS resolver
291
291
+
let mut dns_resolver = MockDnsResolver::new();
292
292
+
dns_resolver.add_record("place.stream", "", "did:plc:test123".to_string());
293
293
+
294
294
+
// Setup mock HTTP client with nested lexicons
295
295
+
let mut http_client = MockHttpClient::new();
296
296
+
297
297
+
// Direct children of place.stream
298
298
+
http_client.add_lexicon("place.stream.chat".to_string(), json!({"id": "place.stream.chat"}));
299
299
+
http_client.add_lexicon("place.stream.key".to_string(), json!({"id": "place.stream.key"}));
300
300
+
http_client.add_lexicon("place.stream.livestream".to_string(), json!({"id": "place.stream.livestream"}));
301
301
+
302
302
+
// Nested children (should NOT match with _)
303
303
+
http_client.add_lexicon("place.stream.chat.profile".to_string(), json!({"id": "place.stream.chat.profile"}));
304
304
+
http_client.add_lexicon("place.stream.chat.message".to_string(), json!({"id": "place.stream.chat.message"}));
305
305
+
http_client.add_lexicon("place.stream.key.defs".to_string(), json!({"id": "place.stream.key.defs"}));
306
306
+
307
307
+
// Create fetcher
308
308
+
let fetcher = LexiconFetcher::new(dns_resolver, http_client);
309
309
+
310
310
+
// Test underscore wildcard (direct children only)
311
311
+
let result = fetcher.fetch_pattern("place.stream._").await;
312
312
+
assert!(result.is_ok());
313
313
+
let lexicons = result.unwrap();
314
314
+
315
315
+
// Should only get 3 direct children
316
316
+
assert_eq!(lexicons.len(), 3);
317
317
+
318
318
+
let nsids: Vec<&str> = lexicons.iter().map(|(nsid, _)| nsid.as_str()).collect();
319
319
+
assert!(nsids.contains(&"place.stream.chat"));
320
320
+
assert!(nsids.contains(&"place.stream.key"));
321
321
+
assert!(nsids.contains(&"place.stream.livestream"));
322
322
+
323
323
+
// Should NOT contain nested children
324
324
+
assert!(!nsids.contains(&"place.stream.chat.profile"));
325
325
+
assert!(!nsids.contains(&"place.stream.chat.message"));
326
326
+
assert!(!nsids.contains(&"place.stream.key.defs"));
327
327
+
}
328
328
+
329
329
+
#[tokio::test]
330
330
+
async fn test_star_vs_underscore_wildcard() {
331
331
+
// Setup mock DNS resolver
332
332
+
let mut dns_resolver = MockDnsResolver::new();
333
333
+
dns_resolver.add_record("place.stream", "", "did:plc:test123".to_string());
334
334
+
335
335
+
// Setup mock HTTP client with nested lexicons
336
336
+
let mut http_client = MockHttpClient::new();
337
337
+
http_client.add_lexicon("place.stream.chat".to_string(), json!({"id": "place.stream.chat"}));
338
338
+
http_client.add_lexicon("place.stream.chat.profile".to_string(), json!({"id": "place.stream.chat.profile"}));
339
339
+
http_client.add_lexicon("place.stream.key".to_string(), json!({"id": "place.stream.key"}));
340
340
+
341
341
+
// Create fetcher
342
342
+
let fetcher = LexiconFetcher::new(dns_resolver, http_client);
343
343
+
344
344
+
// Test star wildcard (all descendants)
345
345
+
let star_result = fetcher.fetch_pattern("place.stream.*").await;
346
346
+
assert!(star_result.is_ok());
347
347
+
let star_lexicons = star_result.unwrap();
348
348
+
assert_eq!(star_lexicons.len(), 3); // All 3 lexicons
349
349
+
350
350
+
// Test underscore wildcard (direct children only)
351
351
+
let underscore_result = fetcher.fetch_pattern("place.stream._").await;
352
352
+
assert!(underscore_result.is_ok());
353
353
+
let underscore_lexicons = underscore_result.unwrap();
354
354
+
assert_eq!(underscore_lexicons.len(), 2); // Only direct children (chat, key)
355
355
+
356
356
+
let nsids: Vec<&str> = underscore_lexicons.iter().map(|(nsid, _)| nsid.as_str()).collect();
357
357
+
assert!(nsids.contains(&"place.stream.chat"));
358
358
+
assert!(nsids.contains(&"place.stream.key"));
359
359
+
assert!(!nsids.contains(&"place.stream.chat.profile"));
360
360
+
}
+6
tests/Cargo.toml
···
12
12
mlf-lang = { path = "../mlf-lang" }
13
13
mlf-codegen = { path = "../mlf-codegen" }
14
14
mlf-diagnostics = { path = "../mlf-diagnostics" }
15
15
+
mlf-lexicon-fetcher = { path = "../mlf-lexicon-fetcher" }
15
16
serde_json = "1.0"
16
17
serde = { version = "1.0", features = ["derive"] }
17
18
toml = "0.8"
19
19
+
tokio = { version = "1", features = ["full"] }
18
20
19
21
[dev-dependencies]
20
22
# Any additional test dependencies
···
26
28
[[test]]
27
29
name = "diagnostics_integration"
28
30
path = "diagnostics_integration.rs"
31
31
+
32
32
+
[[test]]
33
33
+
name = "lexicon_fetcher_integration"
34
34
+
path = "lexicon_fetcher_integration.rs"
+1
-1
tests/codegen_integration.rs
···
27
27
})
28
28
.collect();
29
29
30
30
-
let (passed, failed) = test_utils::run_and_report_tests::<fn(&str) -> Result<(), String>>(tests, "Codegen");
30
30
+
let (_passed, failed) = test_utils::run_and_report_tests::<fn(&str) -> Result<(), String>>(tests, "Codegen");
31
31
32
32
if !failed.is_empty() {
33
33
panic!(
+7
-4
tests/diagnostics_integration.rs
···
5
5
use mlf_integration_tests::test_utils;
6
6
use mlf_lang::{parser::parse_lexicon, Workspace};
7
7
use serde::Deserialize;
8
8
-
use serde_json::Value;
9
8
use std::fs;
10
9
use std::path::Path;
11
10
···
17
16
18
17
#[derive(Debug, Deserialize)]
19
18
struct ExpectedError {
19
19
+
#[allow(dead_code)]
20
20
code: String,
21
21
message: String,
22
22
#[serde(default)]
23
23
+
#[allow(dead_code)]
23
24
span: Option<ExpectedSpan>,
24
25
}
25
26
26
27
#[derive(Debug, Deserialize)]
27
28
struct ExpectedSpan {
29
29
+
#[allow(dead_code)]
28
30
start: usize,
31
31
+
#[allow(dead_code)]
29
32
end: usize,
30
33
}
31
34
···
47
50
})
48
51
.collect();
49
52
50
50
-
let (passed, failed) = test_utils::run_and_report_tests::<fn(&str) -> Result<(), String>>(tests, "Diagnostics");
53
53
+
let (_passed, failed) = test_utils::run_and_report_tests::<fn(&str) -> Result<(), String>>(tests, "Diagnostics");
51
54
52
55
if !failed.is_empty() {
53
56
panic!(
···
96
99
};
97
100
98
101
// 5. Create diagnostic
99
99
-
let diagnostic = ValidationDiagnostic::new(
102
102
+
let _diagnostic = ValidationDiagnostic::new(
100
103
"input.mlf".to_string(),
101
104
input.clone(),
102
105
namespace.clone(),
···
135
138
let actual_error = errors_in_module[i];
136
139
137
140
// Check error code
138
138
-
let actual_code = mlf_diagnostics::get_error_module_namespace_str(actual_error);
141
141
+
let _actual_code = mlf_diagnostics::get_error_module_namespace_str(actual_error);
139
142
140
143
// Format the error message
141
144
let actual_message = format!("{:?}", actual_error);
+4
tests/lexicon_fetcher/dns/basic_resolution/expected.json
···
1
1
+
{
2
2
+
"status": "success",
3
3
+
"did": "did:plc:test123"
4
4
+
}
+4
tests/lexicon_fetcher/dns/basic_resolution/test.toml
···
1
1
+
[test]
2
2
+
description = "Basic DNS resolution for place.stream.chat.profile"
3
3
+
nsid = "place.stream.chat.profile"
4
4
+
did = "did:plc:test123"
+4
tests/lexicon_fetcher/dns/missing_record/expected.json
···
1
1
+
{
2
2
+
"status": "error",
3
3
+
"error": "lookup_failed"
4
4
+
}
+4
tests/lexicon_fetcher/dns/missing_record/test.toml
···
1
1
+
[test]
2
2
+
description = "DNS lookup for non-existent record"
3
3
+
nsid = "nonexistent.domain.test"
4
4
+
should_fail = true
+4
tests/lexicon_fetcher/dns/nested_segments/expected.json
···
1
1
+
{
2
2
+
"status": "success",
3
3
+
"did": "did:plc:nested789"
4
4
+
}
+4
tests/lexicon_fetcher/dns/nested_segments/test.toml
···
1
1
+
[test]
2
2
+
description = "DNS resolution for deeply nested NSID app.bsky.actor.profile.detailed"
3
3
+
nsid = "app.bsky.actor.profile.detailed"
4
4
+
did = "did:plc:nested789"
+4
tests/lexicon_fetcher/dns/wildcard_pattern/expected.json
···
1
1
+
{
2
2
+
"status": "success",
3
3
+
"did": "did:plc:test456"
4
4
+
}
+4
tests/lexicon_fetcher/dns/wildcard_pattern/test.toml
···
1
1
+
[test]
2
2
+
description = "DNS resolution for wildcard pattern place.stream.chat.*"
3
3
+
nsid = "place.stream.chat.*"
4
4
+
did = "did:plc:test456"
+220
tests/lexicon_fetcher_integration.rs
···
1
1
+
// Workspace-level integration tests for lexicon fetcher
2
2
+
// Tests mlf-lexicon-fetcher working with mocks
3
3
+
4
4
+
use mlf_lexicon_fetcher::{DnsResolver, LexiconFetcher, MockDnsResolver, MockHttpClient};
5
5
+
use serde::{Deserialize, Serialize};
6
6
+
use std::fs;
7
7
+
use std::path::Path;
8
8
+
9
9
+
#[derive(Debug, Deserialize)]
10
10
+
struct TestConfig {
11
11
+
test: TestMetadata,
12
12
+
}
13
13
+
14
14
+
#[derive(Debug, Deserialize)]
15
15
+
struct TestMetadata {
16
16
+
description: String,
17
17
+
nsid: String,
18
18
+
#[serde(default)]
19
19
+
did: Option<String>,
20
20
+
#[serde(default)]
21
21
+
should_fail: bool,
22
22
+
}
23
23
+
24
24
+
#[derive(Debug, Deserialize, Serialize, PartialEq)]
25
25
+
struct ExpectedResult {
26
26
+
status: String,
27
27
+
#[serde(skip_serializing_if = "Option::is_none")]
28
28
+
did: Option<String>,
29
29
+
#[serde(skip_serializing_if = "Option::is_none")]
30
30
+
error: Option<String>,
31
31
+
}
32
32
+
33
33
+
#[tokio::test]
34
34
+
async fn lexicon_fetcher_dns_tests() {
35
35
+
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap();
36
36
+
let test_base = format!("{}/lexicon_fetcher/dns", manifest_dir);
37
37
+
let test_dirs = discover_test_dirs(&test_base);
38
38
+
39
39
+
let mut tests = Vec::new();
40
40
+
41
41
+
for test_dir in test_dirs {
42
42
+
let test_name = format!(
43
43
+
"lexicon_fetcher/dns/{}",
44
44
+
Path::new(&test_dir).file_name().unwrap().to_str().unwrap()
45
45
+
);
46
46
+
let result = run_dns_test(&test_dir).await;
47
47
+
tests.push((test_name, result));
48
48
+
}
49
49
+
50
50
+
let (passed, failed) = run_and_report_tests(tests, "Lexicon Fetcher DNS");
51
51
+
52
52
+
if !failed.is_empty() {
53
53
+
panic!(
54
54
+
"\nFailed tests:\n{}",
55
55
+
failed
56
56
+
.iter()
57
57
+
.map(|(name, err)| format!(" - {}: {}", name, err))
58
58
+
.collect::<Vec<_>>()
59
59
+
.join("\n")
60
60
+
);
61
61
+
}
62
62
+
63
63
+
assert!(passed > 0, "No tests were run");
64
64
+
}
65
65
+
66
66
+
async fn run_dns_test(test_dir: &str) -> Result<(), String> {
67
67
+
// Load test configuration
68
68
+
let config_path = format!("{}/test.toml", test_dir);
69
69
+
let config_str = fs::read_to_string(&config_path)
70
70
+
.map_err(|e| format!("Failed to read test.toml: {}", e))?;
71
71
+
let config: TestConfig = toml::from_str(&config_str)
72
72
+
.map_err(|e| format!("Failed to parse test.toml: {}", e))?;
73
73
+
74
74
+
// Load expected output
75
75
+
let expected_path = format!("{}/expected.json", test_dir);
76
76
+
let expected_str = fs::read_to_string(&expected_path)
77
77
+
.map_err(|e| format!("Failed to read expected.json: {}", e))?;
78
78
+
let expected: ExpectedResult = serde_json::from_str(&expected_str)
79
79
+
.map_err(|e| format!("Failed to parse expected.json: {}", e))?;
80
80
+
81
81
+
// Setup mocks
82
82
+
let mut dns_resolver = MockDnsResolver::new();
83
83
+
let http_client = MockHttpClient::new();
84
84
+
85
85
+
// Add DNS record if expected to succeed
86
86
+
if expected.status == "success" {
87
87
+
let (authority, name_segments) = parse_nsid(&config.test.nsid)?;
88
88
+
let did = config.test.did.clone().ok_or_else(|| "Missing DID in test config".to_string())?;
89
89
+
90
90
+
// For wildcard patterns, remove the .* suffix
91
91
+
let dns_name = if name_segments.ends_with(".*") {
92
92
+
name_segments.strip_suffix(".*").unwrap()
93
93
+
} else {
94
94
+
&name_segments
95
95
+
};
96
96
+
97
97
+
dns_resolver.add_record(&authority, dns_name, did.clone());
98
98
+
}
99
99
+
100
100
+
// Create fetcher
101
101
+
let fetcher = LexiconFetcher::new(dns_resolver, http_client);
102
102
+
103
103
+
// Test DNS resolution by attempting to extract the DID
104
104
+
// We can't directly test DNS resolution without HTTP, so we test the full flow
105
105
+
// but expect it to fail at HTTP stage (which is fine for DNS testing)
106
106
+
let (authority, name_segments) = parse_nsid(&config.test.nsid)?;
107
107
+
108
108
+
// For wildcard patterns, strip the .* suffix
109
109
+
let dns_name = if name_segments.ends_with(".*") {
110
110
+
name_segments.strip_suffix(".*").unwrap().to_string()
111
111
+
} else {
112
112
+
name_segments
113
113
+
};
114
114
+
115
115
+
// Create a test resolver directly to check DNS
116
116
+
let mut test_dns = MockDnsResolver::new();
117
117
+
if expected.status == "success" {
118
118
+
let did = config.test.did.clone().ok_or_else(|| "Missing DID in test config".to_string())?;
119
119
+
test_dns.add_record(&authority, &dns_name, did.clone());
120
120
+
121
121
+
// Verify DNS resolution
122
122
+
match test_dns.resolve_lexicon_did(&authority, &dns_name).await {
123
123
+
Ok(resolved_did) => {
124
124
+
if resolved_did != did {
125
125
+
return Err(format!("DID mismatch: expected {}, got {}", did, resolved_did));
126
126
+
}
127
127
+
// Check against expected
128
128
+
if let Some(expected_did) = &expected.did {
129
129
+
if &resolved_did != expected_did {
130
130
+
return Err(format!("DID mismatch with expected: expected {}, got {}", expected_did, resolved_did));
131
131
+
}
132
132
+
}
133
133
+
}
134
134
+
Err(e) => return Err(format!("DNS resolution failed: {:?}", e)),
135
135
+
}
136
136
+
} else {
137
137
+
// Expected to fail
138
138
+
match test_dns.resolve_lexicon_did(&authority, &dns_name).await {
139
139
+
Ok(_) => return Err("Expected DNS lookup to fail, but it succeeded".to_string()),
140
140
+
Err(_) => {
141
141
+
// Success - it failed as expected
142
142
+
}
143
143
+
}
144
144
+
}
145
145
+
146
146
+
Ok(())
147
147
+
}
148
148
+
149
149
+
fn parse_nsid(nsid: &str) -> Result<(String, String), String> {
150
150
+
// Remove wildcard if present for parsing
151
151
+
let nsid_base = nsid.strip_suffix(".*").unwrap_or(nsid);
152
152
+
153
153
+
let parts: Vec<&str> = nsid_base.split('.').collect();
154
154
+
if parts.len() < 2 {
155
155
+
return Err(format!("Invalid NSID: {}", nsid));
156
156
+
}
157
157
+
158
158
+
let authority = format!("{}.{}", parts[0], parts[1]);
159
159
+
let name_segments = if parts.len() > 2 {
160
160
+
let mut segments = parts[2..].join(".");
161
161
+
// Re-add wildcard if original had it
162
162
+
if nsid.ends_with(".*") {
163
163
+
segments.push_str(".*");
164
164
+
}
165
165
+
segments
166
166
+
} else if nsid.ends_with(".*") {
167
167
+
".*".to_string()
168
168
+
} else {
169
169
+
String::new()
170
170
+
};
171
171
+
172
172
+
Ok((authority, name_segments))
173
173
+
}
174
174
+
175
175
+
fn discover_test_dirs(base: &str) -> Vec<String> {
176
176
+
let base_path = Path::new(base);
177
177
+
if !base_path.exists() {
178
178
+
return vec![];
179
179
+
}
180
180
+
181
181
+
let mut dirs: Vec<String> = fs::read_dir(base_path)
182
182
+
.unwrap()
183
183
+
.filter_map(|entry| {
184
184
+
let entry = entry.ok()?;
185
185
+
let path = entry.path();
186
186
+
if path.is_dir() {
187
187
+
Some(path.to_str()?.to_string())
188
188
+
} else {
189
189
+
None
190
190
+
}
191
191
+
})
192
192
+
.collect();
193
193
+
194
194
+
dirs.sort();
195
195
+
dirs
196
196
+
}
197
197
+
198
198
+
fn run_and_report_tests(
199
199
+
tests: Vec<(String, Result<(), String>)>,
200
200
+
test_type: &str,
201
201
+
) -> (usize, Vec<(String, String)>) {
202
202
+
let mut passed = 0;
203
203
+
let mut failed = Vec::new();
204
204
+
205
205
+
for (test_name, result) in tests {
206
206
+
match result {
207
207
+
Ok(()) => {
208
208
+
println!("✓ {}", test_name);
209
209
+
passed += 1;
210
210
+
}
211
211
+
Err(err) => {
212
212
+
println!("✗ {}: {}", test_name, err);
213
213
+
failed.push((test_name, err));
214
214
+
}
215
215
+
}
216
216
+
}
217
217
+
218
218
+
println!("\n{} Results: {} passed, {} failed", test_type, passed, failed.len());
219
219
+
(passed, failed)
220
220
+
}