···11-use crate::config::{find_project_root, get_mlf_cache_dir, init_mlf_cache, ConfigError, MlfConfig};
22-use chrono::{DateTime, Utc};
11+use crate::config::{find_project_root, get_mlf_cache_dir, init_mlf_cache, ConfigError, MlfConfig, LockFile};
32use hickory_resolver::config::*;
43use hickory_resolver::Resolver;
54use miette::Diagnostic;
66-use serde::{Deserialize, Serialize};
55+use serde::Deserialize;
76use sha2::{Digest, Sha256};
88-use std::collections::HashMap;
99-use std::path::Path;
77+use std::collections::HashSet;
108use thiserror::Error;
1191210#[derive(Error, Debug, Diagnostic)]
···4341 #[diagnostic(code(mlf::fetch::io_error))]
4442 IoError(#[from] std::io::Error),
45434646- #[error("Failed to load cache: {0}")]
4747- #[diagnostic(code(mlf::fetch::cache_error))]
4848- CacheError(String),
4949-5044 #[error("Invalid NSID format: {0}")]
5145 #[diagnostic(code(mlf::fetch::invalid_nsid))]
5246 InvalidNsid(String),
5347}
54485555-#[derive(Debug, Serialize, Deserialize)]
5656-pub struct LexiconCache {
5757- #[serde(default)]
5858- pub lexicons: HashMap<String, CacheEntry>,
5959-}
6060-6161-#[derive(Debug, Serialize, Deserialize, Clone)]
6262-pub struct CacheEntry {
6363- pub nsid: String,
6464- pub fetched_at: DateTime<Utc>,
6565- pub did: String,
6666- #[serde(default)]
6767- pub hash: String,
6868-}
6969-7070-impl LexiconCache {
7171- pub fn load(path: &Path) -> Result<Self, FetchError> {
7272- if !path.exists() {
7373- return Ok(Self {
7474- lexicons: HashMap::new(),
7575- });
7676- }
7777-7878- let content = std::fs::read_to_string(path)?;
7979- toml::from_str(&content).map_err(|e| FetchError::CacheError(e.to_string()))
8080- }
8181-8282- pub fn save(&self, path: &Path) -> Result<(), FetchError> {
8383- let content =
8484- toml::to_string_pretty(self).map_err(|e| FetchError::CacheError(e.to_string()))?;
8585- std::fs::write(path, content)?;
8686- Ok(())
8787- }
8888-8989- pub fn add_entry(&mut self, nsid: String, did: String, hash: String) {
9090- self.lexicons.insert(
9191- nsid.clone(),
9292- CacheEntry {
9393- nsid,
9494- fetched_at: Utc::now(),
9595- did,
9696- hash,
9797- },
9898- );
9999- }
100100-}
1014910250#[derive(Debug, Deserialize)]
10351struct AtProtoRecord {
···10654}
1075510856/// Main entry point for fetch command
109109-pub fn run_fetch(nsid: Option<String>, save: bool) -> Result<(), FetchError> {
5757+pub fn run_fetch(nsid: Option<String>, save: bool, update: bool, locked: bool) -> Result<(), FetchError> {
5858+ // Validate flags
5959+ if update && locked {
6060+ return Err(FetchError::HttpError(
6161+ "Cannot use --update and --locked together".to_string()
6262+ ));
6363+ }
6464+11065 // Find project root
11166 let current_dir = std::env::current_dir()?;
11267 let project_root = ensure_project_root(¤t_dir)?;
···12580 }
12681 None => {
12782 // Fetch all dependencies from mlf.toml
128128- fetch_all_dependencies(&project_root)
8383+ fetch_all_dependencies(&project_root, update, locked)
12984 }
13085 }
13186}
···156111 }
157112}
158113159159-fn fetch_all_dependencies(project_root: &std::path::Path) -> Result<(), FetchError> {
114114+fn fetch_all_dependencies(project_root: &std::path::Path, update: bool, locked: bool) -> Result<(), FetchError> {
160115 // Load mlf.toml
161116 let config_path = project_root.join("mlf.toml");
162117 let config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?;
···166121 return Ok(());
167122 }
168123169169- println!("Fetching {} dependencies...", config.dependencies.dependencies.len());
124124+ let allow_transitive = config.dependencies.allow_transitive_deps;
125125+126126+ // Load or create lockfile
127127+ let lockfile_path = project_root.join("mlf-lock.toml");
128128+ let existing_lockfile = LockFile::load(&lockfile_path).map_err(FetchError::NoProjectRoot)?;
129129+ let has_existing_lockfile = lockfile_path.exists() && !existing_lockfile.lexicons.is_empty();
130130+131131+ // Handle --locked mode
132132+ if locked {
133133+ if !has_existing_lockfile {
134134+ return Err(FetchError::HttpError(
135135+ "No lockfile found. Run `mlf fetch` first to create mlf-lock.toml".to_string()
136136+ ));
137137+ }
138138+139139+ // In locked mode, we use the lockfile and verify nothing needs updating
140140+ // For now, we'll just use the lockfile - verification can be enhanced later
141141+ println!("Using locked dependencies from mlf-lock.toml");
142142+ return fetch_from_lockfile(project_root, &existing_lockfile);
143143+ }
144144+145145+ // Determine fetch mode
146146+ let mode = if update {
147147+ "update (ignoring lockfile)"
148148+ } else if has_existing_lockfile {
149149+ "lockfile"
150150+ } else {
151151+ "fresh"
152152+ };
153153+154154+ println!("Fetching {} dependencies... (mode: {}, transitive deps: {})",
155155+ config.dependencies.dependencies.len(),
156156+ mode,
157157+ if allow_transitive { "enabled" } else { "disabled" });
158158+159159+ // In update mode or if no lockfile, do full fetch
160160+ // In normal mode with lockfile, use lockfile for cached entries
161161+ let mut lockfile = if update || !has_existing_lockfile {
162162+ LockFile::new()
163163+ } else {
164164+ existing_lockfile
165165+ };
170166171167 let mut errors = Vec::new();
172168 let mut success_count = 0;
169169+ let mut fetched_nsids = HashSet::new();
173170171171+ // Fetch initial dependencies
174172 for dep in &config.dependencies.dependencies {
175173 println!("\nFetching: {}", dep);
176176- match fetch_lexicon(dep, project_root) {
174174+ match fetch_lexicon_with_lock(dep, project_root, &mut lockfile) {
177175 Ok(()) => {
178176 success_count += 1;
177177+ fetched_nsids.insert(dep.clone());
179178 }
180179 Err(e) => {
181180 errors.push((dep.clone(), format!("{}", e)));
···183182 }
184183 }
185184185185+ // If transitive dependencies are enabled, iteratively fetch missing deps
186186+ if allow_transitive {
187187+ let mut iteration = 0;
188188+ let max_iterations = 10; // Prevent infinite loops
189189+190190+ loop {
191191+ iteration += 1;
192192+ if iteration > max_iterations {
193193+ eprintln!("\nWarning: Reached maximum iteration limit for transitive dependencies");
194194+ break;
195195+ }
196196+197197+ // Collect unresolved references
198198+ let unresolved = match collect_unresolved_references(project_root) {
199199+ Ok(refs) => refs,
200200+ Err(e) => {
201201+ eprintln!("\nWarning: Failed to analyze dependencies: {}", e);
202202+ break;
203203+ }
204204+ };
205205+206206+ // Filter out NSIDs we've already fetched or tried to fetch
207207+ let new_deps: HashSet<String> = unresolved
208208+ .into_iter()
209209+ .filter(|nsid| !fetched_nsids.contains(nsid))
210210+ .collect();
211211+212212+ if new_deps.is_empty() {
213213+ break;
214214+ }
215215+216216+ // Determine whether to optimize transitive fetches
217217+ let should_optimize = config.dependencies.optimize_transitive_fetches;
218218+219219+ if should_optimize {
220220+ // Optimize the fetch patterns to reduce number of fetches
221221+ let optimized_patterns = optimize_fetch_patterns(&new_deps);
222222+223223+ println!("\n→ Found {} unresolved reference(s), fetching {} optimized pattern(s)...",
224224+ new_deps.len(), optimized_patterns.len());
225225+226226+ // Track which patterns are wildcards and their constituent NSIDs
227227+ let mut wildcard_failures: Vec<(String, Vec<String>)> = Vec::new();
228228+229229+ for pattern in optimized_patterns {
230230+ let is_wildcard = pattern.ends_with(".*");
231231+ println!("\nFetching transitive dependency: {}", pattern);
232232+ fetched_nsids.insert(pattern.clone());
233233+234234+ match fetch_lexicon_with_lock(&pattern, project_root, &mut lockfile) {
235235+ Ok(()) => {
236236+ success_count += 1;
237237+ }
238238+ Err(e) => {
239239+ eprintln!(" Warning: Failed to fetch {}: {}", pattern, e);
240240+241241+ // If this was a wildcard that failed, collect the individual NSIDs for retry
242242+ if is_wildcard {
243243+ let pattern_prefix = pattern.strip_suffix(".*").unwrap();
244244+ let matching_nsids: Vec<String> = new_deps.iter()
245245+ .filter(|nsid| nsid.starts_with(pattern_prefix))
246246+ .cloned()
247247+ .collect();
248248+249249+ if !matching_nsids.is_empty() {
250250+ wildcard_failures.push((pattern.clone(), matching_nsids));
251251+ }
252252+ }
253253+ }
254254+ }
255255+ }
256256+257257+ // Retry failed wildcards with individual NSIDs
258258+ if !wildcard_failures.is_empty() {
259259+ println!("\n→ Retrying failed wildcard patterns with individual NSIDs...");
260260+261261+ for (failed_pattern, nsids) in wildcard_failures {
262262+ println!(" Retrying {} NSIDs from failed pattern: {}", nsids.len(), failed_pattern);
263263+264264+ for nsid in nsids {
265265+ if !fetched_nsids.contains(&nsid) {
266266+ println!(" Fetching: {}", nsid);
267267+ fetched_nsids.insert(nsid.clone());
268268+269269+ match fetch_lexicon_with_lock(&nsid, project_root, &mut lockfile) {
270270+ Ok(()) => {
271271+ success_count += 1;
272272+ }
273273+ Err(e) => {
274274+ eprintln!(" Warning: Failed to fetch {}: {}", nsid, e);
275275+ }
276276+ }
277277+ }
278278+ }
279279+ }
280280+ }
281281+ } else {
282282+ // Fetch individually without optimization (safer, more predictable)
283283+ println!("\n→ Found {} unresolved reference(s), fetching individually...",
284284+ new_deps.len());
285285+286286+ for nsid in &new_deps {
287287+ println!("\nFetching transitive dependency: {}", nsid);
288288+ fetched_nsids.insert(nsid.clone());
289289+290290+ match fetch_lexicon_with_lock(nsid, project_root, &mut lockfile) {
291291+ Ok(()) => {
292292+ success_count += 1;
293293+ }
294294+ Err(e) => {
295295+ // Don't fail the entire fetch for transitive deps
296296+ eprintln!(" Warning: Failed to fetch {}: {}", nsid, e);
297297+ }
298298+ }
299299+ }
300300+ }
301301+ }
302302+ }
303303+304304+ // Save the lockfile
305305+ lockfile.save(&lockfile_path).map_err(FetchError::NoProjectRoot)?;
306306+ println!("\n→ Updated mlf-lock.toml");
307307+186308 if !errors.is_empty() {
187309 eprintln!(
188310 "\n{} dependency(ies) fetched successfully, {} error(s):",
···202324 Ok(())
203325}
204326327327+/// Fetch dependencies using the lockfile
328328+/// This refetches each lexicon from its recorded DID and verifies the checksum
329329+fn fetch_from_lockfile(project_root: &std::path::Path, lockfile: &LockFile) -> Result<(), FetchError> {
330330+ if lockfile.lexicons.is_empty() {
331331+ println!("Lockfile is empty");
332332+ return Ok(());
333333+ }
334334+335335+ println!("Fetching {} lexicon(s) from lockfile...", lockfile.lexicons.len());
336336+337337+ let mut errors = Vec::new();
338338+ let mut success_count = 0;
339339+340340+ // Fetch each lexicon from its DID
341341+ for (nsid, locked) in &lockfile.lexicons {
342342+ println!("\nRefetching: {}", nsid);
343343+344344+ // Fetch the lexicon using the DID from lockfile
345345+ match fetch_specific_lexicon(nsid, &locked.did, &locked.checksum, project_root) {
346346+ Ok(()) => {
347347+ success_count += 1;
348348+ }
349349+ Err(e) => {
350350+ errors.push((nsid.clone(), format!("{}", e)));
351351+ }
352352+ }
353353+ }
354354+355355+ if !errors.is_empty() {
356356+ eprintln!(
357357+ "\n{} lexicon(s) fetched successfully, {} error(s):",
358358+ success_count,
359359+ errors.len()
360360+ );
361361+ for (nsid, error) in &errors {
362362+ eprintln!(" {} - {}", nsid, error);
363363+ }
364364+ return Err(FetchError::HttpError(format!(
365365+ "Failed to fetch {} lexicons",
366366+ errors.len()
367367+ )));
368368+ }
369369+370370+ println!("\n✓ Successfully fetched all {} lexicons", success_count);
371371+ Ok(())
372372+}
373373+374374+/// Fetch a specific lexicon by NSID from a known DID, verifying checksum
375375+fn fetch_specific_lexicon(
376376+ nsid: &str,
377377+ did: &str,
378378+ expected_checksum: &str,
379379+ project_root: &std::path::Path,
380380+) -> Result<(), FetchError> {
381381+ // Initialize .mlf directory
382382+ init_mlf_cache(project_root).map_err(FetchError::InitFailed)?;
383383+ let mlf_dir = get_mlf_cache_dir(project_root);
384384+385385+ // Fetch records from the DID
386386+ let records = fetch_lexicon_records(did)?;
387387+388388+ // Find the specific NSID
389389+ for record in records {
390390+ let record_nsid = extract_nsid_from_record(&record)?;
391391+392392+ if record_nsid == nsid {
393393+ // Found it! Process and verify checksum
394394+ let json_str = serde_json::to_string_pretty(&record.value)?;
395395+ let hash = calculate_hash(&json_str);
396396+397397+ if hash != expected_checksum {
398398+ return Err(FetchError::HttpError(format!(
399399+ "Checksum mismatch for {}: expected {}, got {}",
400400+ nsid, expected_checksum, hash
401401+ )));
402402+ }
403403+404404+ // Save JSON
405405+ let mut json_path = mlf_dir.join("lexicons/json");
406406+ for segment in nsid.split('.') {
407407+ json_path.push(segment);
408408+ }
409409+ json_path.set_extension("json");
410410+411411+ if let Some(parent) = json_path.parent() {
412412+ std::fs::create_dir_all(parent)?;
413413+ }
414414+ std::fs::write(&json_path, &json_str)?;
415415+ println!(" → Saved JSON (checksum verified)");
416416+417417+ // Convert to MLF
418418+ let mlf_content = crate::generate::mlf::generate_mlf_from_json(&record.value)
419419+ .map_err(|e| FetchError::ConversionError(format!("{:?}", e)))?;
420420+421421+ let mut mlf_path = mlf_dir.join("lexicons/mlf");
422422+ for segment in nsid.split('.') {
423423+ mlf_path.push(segment);
424424+ }
425425+ mlf_path.set_extension("mlf");
426426+427427+ if let Some(parent) = mlf_path.parent() {
428428+ std::fs::create_dir_all(parent)?;
429429+ }
430430+ std::fs::write(&mlf_path, mlf_content)?;
431431+ println!(" → Converted to MLF");
432432+433433+ return Ok(());
434434+ }
435435+ }
436436+437437+ Err(FetchError::HttpError(format!(
438438+ "Lexicon {} not found in repo {}",
439439+ nsid, did
440440+ )))
441441+}
442442+205443fn save_dependency(project_root: &std::path::Path, nsid: &str) -> Result<(), FetchError> {
206444 let config_path = project_root.join("mlf.toml");
207445 let mut config = MlfConfig::load(&config_path).map_err(FetchError::NoProjectRoot)?;
···219457}
220458221459pub fn fetch_lexicon(nsid: &str, project_root: &std::path::Path) -> Result<(), FetchError> {
460460+ let mut lockfile = LockFile::new();
461461+ fetch_lexicon_with_lock(nsid, project_root, &mut lockfile)
462462+}
463463+464464+fn fetch_lexicon_with_lock(nsid: &str, project_root: &std::path::Path, lockfile: &mut LockFile) -> Result<(), FetchError> {
222465 // Initialize .mlf directory
223466 init_mlf_cache(project_root).map_err(FetchError::InitFailed)?;
224467225468 let mlf_dir = get_mlf_cache_dir(&project_root);
226226- let cache_file = mlf_dir.join(".lexicon-cache.toml");
227227- let mut cache = LexiconCache::load(&cache_file)?;
228469229470 // Validate NSID format: must be specific (3+ segments) or use wildcard
230471 validate_nsid_format(nsid)?;
···236477 } else {
237478 nsid
238479 };
239239-240240- // Check if already cached (for specific NSIDs only)
241241- if !is_wildcard && cache.lexicons.contains_key(nsid) {
242242- println!("Lexicon '{}' is already cached. Skipping fetch.", nsid);
243243- println!(" (Use --force to re-fetch)");
244244- return Ok(());
245245- }
246480247481 // Extract authority and name segments from NSID
248482 // For "app.bsky.actor.profile", authority is "app.bsky", name is "actor.profile"
···339573 // Calculate hash of JSON content
340574 let hash = calculate_hash(&json_str);
341575342342- // Update cache
343343- cache.add_entry(record_nsid.clone(), did.clone(), hash);
576576+ // Extract dependencies from JSON
577577+ let dependencies = extract_dependencies_from_json(&record.value);
578578+579579+ // Update lockfile
580580+ lockfile.add_lexicon(record_nsid.clone(), did.clone(), hash.clone(), dependencies);
344581 }
345345-346346- // Save cache
347347- cache.save(&cache_file)?;
348582349583 if processed_count == 0 {
350584 return Err(FetchError::HttpError(format!(
···587821fn calculate_hash(content: &str) -> String {
588822 let mut hasher = Sha256::new();
589823 hasher.update(content.as_bytes());
590590- format!("{:x}", hasher.finalize())
824824+ format!("sha256:{:x}", hasher.finalize())
825825+}
826826+827827+/// Extract external references from a lexicon JSON
828828+/// Returns a list of NSIDs that this lexicon depends on
829829+fn extract_dependencies_from_json(json: &serde_json::Value) -> Vec<String> {
830830+ let mut deps = HashSet::new();
831831+832832+ fn visit_value(value: &serde_json::Value, deps: &mut HashSet<String>) {
833833+ match value {
834834+ serde_json::Value::Object(map) => {
835835+ // Check if this is a ref object
836836+ if let Some(ref_val) = map.get("ref") {
837837+ if let Some(ref_str) = ref_val.as_str() {
838838+ // External refs are multi-segment NSIDs
839839+ if ref_str.contains('.') {
840840+ deps.insert(ref_str.to_string());
841841+ }
842842+ }
843843+ }
844844+845845+ // Recurse into all values
846846+ for val in map.values() {
847847+ visit_value(val, deps);
848848+ }
849849+ }
850850+ serde_json::Value::Array(arr) => {
851851+ for val in arr {
852852+ visit_value(val, deps);
853853+ }
854854+ }
855855+ _ => {}
856856+ }
857857+ }
858858+859859+ visit_value(json, &mut deps);
860860+ let mut result: Vec<String> = deps.into_iter().collect();
861861+ result.sort();
862862+ result
863863+}
864864+865865+/// Extract external references from MLF files that need to be resolved
866866+/// Returns a set of namespace patterns (not full NSIDs) that need to be fetched
867867+fn collect_unresolved_references(project_root: &std::path::Path) -> Result<HashSet<String>, FetchError> {
868868+ use mlf_lang::{parser, workspace::Workspace};
869869+870870+ let mlf_dir = get_mlf_cache_dir(project_root);
871871+ let mlf_lexicons_dir = mlf_dir.join("lexicons/mlf");
872872+873873+ if !mlf_lexicons_dir.exists() {
874874+ return Ok(HashSet::new());
875875+ }
876876+877877+ // Build a workspace from all fetched MLF files
878878+ let mut workspace = Workspace::new();
879879+ let mut unresolved = HashSet::new();
880880+881881+ // Recursively find all .mlf files
882882+ fn collect_mlf_files(dir: &std::path::Path, files: &mut Vec<std::path::PathBuf>) -> std::io::Result<()> {
883883+ if dir.is_dir() {
884884+ for entry in std::fs::read_dir(dir)? {
885885+ let entry = entry?;
886886+ let path = entry.path();
887887+ if path.is_dir() {
888888+ collect_mlf_files(&path, files)?;
889889+ } else if path.extension().and_then(|s| s.to_str()) == Some("mlf") {
890890+ files.push(path);
891891+ }
892892+ }
893893+ }
894894+ Ok(())
895895+ }
896896+897897+ let mut mlf_files = Vec::new();
898898+ collect_mlf_files(&mlf_lexicons_dir, &mut mlf_files)?;
899899+900900+ // Parse each MLF file and add to workspace
901901+ for mlf_file in mlf_files {
902902+ let content = std::fs::read_to_string(&mlf_file)?;
903903+904904+ // Extract namespace from file path
905905+ // e.g., ".mlf/lexicons/mlf/place/stream/key.mlf" -> "place.stream.key"
906906+ let relative_path = mlf_file.strip_prefix(&mlf_lexicons_dir)
907907+ .map_err(|_| FetchError::IoError(std::io::Error::new(
908908+ std::io::ErrorKind::Other,
909909+ "Failed to compute relative path"
910910+ )))?;
911911+912912+ let namespace = relative_path
913913+ .with_extension("")
914914+ .to_string_lossy()
915915+ .replace(std::path::MAIN_SEPARATOR, ".");
916916+917917+ // Parse the lexicon
918918+ if let Ok(lexicon) = parser::parse_lexicon(&content) {
919919+ let _ = workspace.add_module(namespace, lexicon);
920920+ }
921921+ }
922922+923923+ // Resolve to find undefined references
924924+ if let Err(errors) = workspace.resolve() {
925925+ for error in errors.errors {
926926+ if let mlf_lang::error::ValidationError::UndefinedReference { name, .. } = error {
927927+ // Only collect multi-segment NSIDs (external references)
928928+ // Single-segment names are likely local typos
929929+ if name.contains('.') {
930930+ // Convert type reference to namespace pattern
931931+ // e.g., "app.bsky.actor.defs.profileViewBasic" -> "app.bsky.actor.*"
932932+ // We fetch the whole namespace since we don't know which specific
933933+ // lexicon file contains the type definition
934934+ let namespace_pattern = extract_namespace_pattern(&name);
935935+ unresolved.insert(namespace_pattern);
936936+ }
937937+ }
938938+ }
939939+ }
940940+941941+ Ok(unresolved)
942942+}
943943+944944+/// Extract the namespace pattern from a type reference
945945+/// For "app.bsky.actor.defs.profileViewBasic" returns "app.bsky.actor.*"
946946+/// This handles the common ATProto pattern where defs are in a separate namespace
947947+fn extract_namespace_pattern(type_ref: &str) -> String {
948948+ let parts: Vec<&str> = type_ref.split('.').collect();
949949+950950+ // For references with 3+ segments, use the first 3 segments as the namespace
951951+ // e.g., "app.bsky.actor.defs.profileViewBasic" -> "app.bsky.actor.*"
952952+ // e.g., "com.atproto.repo.strongRef" -> "com.atproto.repo.*"
953953+ if parts.len() >= 3 {
954954+ format!("{}.{}.{}.*", parts[0], parts[1], parts[2])
955955+ } else if parts.len() == 2 {
956956+ // For 2-segment refs like "place.stream", fetch everything under that authority
957957+ format!("{}.*", type_ref)
958958+ } else {
959959+ // Single segment or empty, just return as-is (shouldn't happen)
960960+ type_ref.to_string()
961961+ }
962962+}
963963+964964+/// Optimize a set of NSIDs by collapsing them into the minimal set of fetch patterns
965965+/// For example: ["app.bsky.actor.foo", "app.bsky.actor.bar"] -> ["app.bsky.actor.*"]
966966+/// This function tries multiple grouping strategies to find the most efficient pattern
967967+fn optimize_fetch_patterns(nsids: &HashSet<String>) -> Vec<String> {
968968+ use std::collections::BTreeMap;
969969+970970+ if nsids.is_empty() {
971971+ return Vec::new();
972972+ }
973973+974974+ // Strategy 1: Try grouping by authority (first 2 segments)
975975+ // e.g., ["app.bsky.actor.foo", "app.bsky.feed.bar"] -> ["app.bsky.*"]
976976+ let mut authority_groups: BTreeMap<String, Vec<String>> = BTreeMap::new();
977977+978978+ for nsid in nsids {
979979+ let parts: Vec<&str> = nsid.split('.').collect();
980980+ if parts.len() >= 2 {
981981+ let authority = format!("{}.{}", parts[0], parts[1]);
982982+ authority_groups.entry(authority).or_insert_with(Vec::new).push(nsid.clone());
983983+ }
984984+ }
985985+986986+ // Strategy 2: Try grouping by namespace prefix (all but last segment)
987987+ // e.g., ["app.bsky.actor.foo", "app.bsky.actor.bar"] -> ["app.bsky.actor.*"]
988988+ let mut prefix_groups: BTreeMap<String, Vec<String>> = BTreeMap::new();
989989+990990+ for nsid in nsids {
991991+ let parts: Vec<&str> = nsid.split('.').collect();
992992+ if parts.len() >= 3 {
993993+ let prefix = parts[..parts.len() - 1].join(".");
994994+ prefix_groups.entry(prefix).or_insert_with(Vec::new).push(nsid.clone());
995995+ }
996996+ }
997997+998998+ let mut result = Vec::new();
999999+ let mut handled_nsids = HashSet::new();
10001000+10011001+ // First pass: Apply namespace-level grouping (more specific)
10021002+ for (prefix, group) in &prefix_groups {
10031003+ if group.len() >= 2 && !handled_nsids.contains(&group[0]) {
10041004+ result.push(format!("{}.*", prefix));
10051005+ for nsid in group {
10061006+ handled_nsids.insert(nsid.clone());
10071007+ }
10081008+ }
10091009+ }
10101010+10111011+ // Second pass: For remaining NSIDs, consider authority-level grouping
10121012+ // Only use authority wildcard if we have 3+ different namespaces under same authority
10131013+ for (authority, group) in &authority_groups {
10141014+ let unhandled: Vec<&String> = group.iter()
10151015+ .filter(|nsid| !handled_nsids.contains(*nsid))
10161016+ .collect();
10171017+10181018+ if unhandled.len() >= 3 {
10191019+ result.push(format!("{}.*", authority));
10201020+ for nsid in &unhandled {
10211021+ handled_nsids.insert((*nsid).clone());
10221022+ }
10231023+ }
10241024+ }
10251025+10261026+ // Third pass: Add remaining individual NSIDs
10271027+ for nsid in nsids {
10281028+ if !handled_nsids.contains(nsid) {
10291029+ result.push(nsid.clone());
10301030+ }
10311031+ }
10321032+10331033+ // Sort for consistent output
10341034+ result.sort();
10351035+ result
5911036}
+8-2
mlf-cli/src/main.rs
···63636464 #[arg(long, help = "Add namespace to dependencies in mlf.toml")]
6565 save: bool,
6666+6767+ #[arg(long, help = "Update dependencies to latest versions (ignores lockfile)")]
6868+ update: bool,
6969+7070+ #[arg(long, help = "Require lockfile and fail if dependencies need updating")]
7171+ locked: bool,
6672 },
6773}
6874···134140 generate::run_all().into_diagnostic()
135141 }
136142 },
137137- Commands::Fetch { nsid, save } => {
138138- fetch::run_fetch(nsid, save).into_diagnostic()
143143+ Commands::Fetch { nsid, save, update, locked } => {
144144+ fetch::run_fetch(nsid, save, update, locked).into_diagnostic()
139145 }
140146 };
141147
+33-5
website/content/docs/cli/02-configuration.md
···9191 "app.bsky",
9292 "com.atproto"
9393]
9494+9595+# Enable/disable transitive dependency resolution (default: true)
9696+allow_transitive_deps = true
9797+9898+# Enable/disable fetch optimization (default: false)
9999+# When true, tries to collapse similar NSIDs into wildcards
100100+optimize_transitive_fetches = false
94101```
951029696-These dependencies are fetched when you run `mlf fetch` without arguments.
103103+**Options:**
104104+105105+- `dependencies` - List of NSID patterns to fetch (supports wildcards like `app.bsky.*`)
106106+- `allow_transitive_deps` - Automatically fetch dependencies of dependencies (default: `true`)
107107+- `optimize_transitive_fetches` - Group similar NSIDs into wildcards to reduce fetch count (default: `false`)
108108+109109+These dependencies are fetched when you run `mlf fetch` without arguments. MLF automatically resolves transitive dependencies unless `allow_transitive_deps` is set to `false`. See the [Fetch Command](../07-fetch/#transitive-dependencies) for more details.
9711098111## Commands Using Configuration
99112···257270```
258271.mlf/
259272├── .gitignore # Automatically created
260260-├── .lexicon-cache.toml # Metadata about fetched lexicons
261273└── lexicons/
262274 ├── json/ # Original JSON lexicons
263275 │ ├── app.bsky.actor.profile.json
···269281270282The `.mlf` directory is automatically added to `.gitignore`, so fetched lexicons won't be committed to your repository.
271283284284+## Lockfile
285285+286286+MLF tracks resolved lexicons in `mlf-lock.toml` at your project root:
287287+288288+```toml
289289+version = 1
290290+291291+[lexicons."app.bsky.actor.profile"]
292292+nsid = "app.bsky.actor.profile"
293293+did = "did:plc:4v4y5r3lwsbtmsxhile2ljac"
294294+checksum = "sha256:abc123..."
295295+dependencies = ["com.atproto.repo.strongRef"]
296296+```
297297+298298+**Always commit `mlf-lock.toml`** to version control to ensure reproducible builds. See the [Fetch Command](../07-fetch/#lockfile-mlf-lock-toml) documentation for details.
299299+272300## Best Practices
273301274274-1. **Commit `mlf.toml`** - Version control your configuration
275275-2. **Don't commit `.mlf/`** - Let each developer fetch dependencies
302302+1. **Commit `mlf.toml` and `mlf-lock.toml`** - Version control your configuration and lockfile
303303+2. **Don't commit `.mlf/`** - Let each developer fetch dependencies independently
2763043. **Use semantic namespaces** - Organize lexicons by domain
2773054. **Set consistent root** - Keep your source directory as the root for namespace calculation
2783065. **Multiple outputs** - Generate both lexicons and code simultaneously
279279-6. **CI/CD integration** - Run `mlf check` in your CI pipeline
307307+6. **CI/CD integration** - Run `mlf check` and `mlf fetch --locked` in your CI pipeline
280308281309## Override Configuration
282310
+5-3
website/content/docs/cli/03-init.md
···3636 ```
3737 .mlf/
3838 ├── .gitignore # Ignores all files except itself
3939- ├── .lexicon-cache.toml # Metadata about fetched lexicons
4039 └── lexicons/
4140 ├── json/ # Original JSON lexicons
4241 └── mlf/ # Converted MLF format
4342 ```
44434544The `.mlf` directory is automatically added to `.gitignore` so fetched lexicons aren't committed to version control.
4545+4646+When you fetch dependencies, a **mlf-lock.toml** lockfile is created at the project root to track resolved lexicon versions. This lockfile should be committed to version control.
46474748## Interactive Mode
4849···1971981981991. **Always start with init** - It sets up the correct structure
1992002. **Use --yes in scripts** - Avoids hanging on prompts
200200-3. **Commit mlf.toml** - Track your project configuration
201201-4. **Don't commit .mlf/** - Let each developer fetch dependencies
201201+3. **Commit mlf.toml and mlf-lock.toml** - Track your project configuration and lockfile
202202+4. **Don't commit .mlf/** - Let each developer fetch dependencies independently
2022035. **Customize after init** - Edit `mlf.toml` to add outputs and dependencies
204204+6. **Use --locked in CI** - Run `mlf fetch --locked` for reproducible builds
+152-49
website/content/docs/cli/07-fetch.md
···99## Usage
10101111```bash
1212-# Fetch all dependencies from mlf.toml
1212+# Fetch all dependencies from mlf.toml (use lockfile if present)
1313mlf fetch
1414+1515+# Fetch and update all dependencies to latest versions
1616+mlf fetch --update
1717+1818+# Strict mode: fetch from lockfile only (for CI/CD)
1919+mlf fetch --locked
14201521# Fetch a specific lexicon
1622mlf fetch <NSID>
···30363137**Options:**
3238- `--save` - Add the NSID/pattern to dependencies in `mlf.toml`
3939+- `--update` - Update dependencies to latest versions (ignores lockfile)
4040+- `--locked` - Require lockfile and fail if dependencies need updating (for CI/CD)
4141+4242+## Lockfile (`mlf-lock.toml`)
4343+4444+MLF uses a lockfile to ensure reproducible builds, similar to `package-lock.json` (npm) or `Cargo.lock` (Rust).
4545+4646+### Lockfile Format
4747+4848+```toml
4949+version = 1
5050+5151+[lexicons."place.stream.richtext.facet"]
5252+nsid = "place.stream.richtext.facet"
5353+did = "did:web:stream.place"
5454+checksum = "sha256:72c8986132821c7c6e3bd30d697f017861d77867b358e3c7850c19baef0a50d5"
5555+dependencies = ["app.bsky.richtext.facet#byteSlice"]
5656+5757+[lexicons."app.bsky.richtext.facet"]
5858+nsid = "app.bsky.richtext.facet"
5959+did = "did:plc:4v4y5r3lwsbtmsxhile2ljac"
6060+checksum = "sha256:db59d218c482774e617bb5d90d19ab75e2557f8cdebafe798be01b37d957d336"
6161+```
6262+6363+### Fetch Modes
6464+6565+| Mode | Command | Behavior |
6666+|------|---------|----------|
6767+| **Fresh** | `mlf fetch` | No lockfile exists, performs full DNS lookup and fetch, creates lockfile |
6868+| **Lockfile** | `mlf fetch` | Uses existing lockfile to guide fetch, updates lockfile if dependencies change |
6969+| **Update** | `mlf fetch --update` | Ignores lockfile, refetches everything, updates lockfile with latest versions |
7070+| **Locked** | `mlf fetch --locked` | Strict CI mode, uses only lockfile, verifies checksums, fails if no lockfile exists |
7171+7272+### When to Use Each Mode
7373+7474+- **Development**: Use `mlf fetch` (default) - respects lockfile for consistency
7575+- **Update deps**: Use `mlf fetch --update` - gets latest versions
7676+- **CI/Production**: Use `mlf fetch --locked` - ensures reproducible builds
7777+7878+## Transitive Dependencies
7979+8080+MLF automatically resolves and fetches transitive dependencies (dependencies of dependencies).
8181+8282+### Example
8383+8484+If `place.stream.richtext.facet` depends on `app.bsky.richtext.facet#byteSlice`, MLF will:
8585+1. Fetch `place.stream.richtext.*` (your explicit dependency)
8686+2. Parse the lexicons to find external references
8787+3. Automatically fetch `app.bsky.richtext.*` (transitive dependency)
8888+4. Record both in `mlf-lock.toml`
8989+9090+### Configuration
9191+9292+Control transitive dependency resolution in `mlf.toml`:
9393+9494+```toml
9595+[dependencies]
9696+dependencies = ["place.stream.*"]
9797+9898+# Enable/disable transitive dependency resolution (default: true)
9999+allow_transitive_deps = true
100100+101101+# Enable/disable fetch optimization (default: false)
102102+# When true, tries to collapse similar NSIDs into wildcards
103103+optimize_transitive_fetches = false
104104+```
3310534106## How It Works
35107···391112. **DID Resolution** - Resolves the DID to a PDS endpoint
401123. **Fetch Records** - Queries `com.atproto.repo.listRecords` for lexicon schemas
411134. **Save & Convert** - Saves JSON and converts to MLF format
114114+5. **Update Lockfile** - Records NSIDs, DIDs, checksums, and dependencies
4211543116## Examples
44117···5913260133**Output:**
61134```
6262-Fetching 2 dependencies...
135135+Fetching 2 dependencies... (mode: fresh, transitive deps: enabled)
6313664137Fetching: com.example.forum.*
65138Fetching lexicons for pattern: com.example.forum.*
···69142 Processing: com.example.forum.post
70143 → Saved JSON to .mlf/lexicons/json/com/example/forum/post.json
71144 → Converted to MLF at .mlf/lexicons/mlf/com/example/forum/post.mlf
7272- Processing: com.example.forum.thread
7373- → Saved JSON to .mlf/lexicons/json/com/example/forum/thread.json
7474- → Converted to MLF at .mlf/lexicons/mlf/com/example/forum/thread.mlf
75145✓ Successfully fetched 2 lexicon(s) for com.example.forum.*
761467777-Fetching: com.example.social.*
7878-...
147147+→ Updated mlf-lock.toml
7914880149✓ Successfully fetched all 2 dependencies
81150```
82151152152+### Update to Latest Versions
153153+154154+```bash
155155+mlf fetch --update
156156+```
157157+158158+This ignores the lockfile and fetches the latest versions of all dependencies.
159159+160160+### CI/CD with Locked Mode
161161+162162+```bash
163163+mlf fetch --locked
164164+```
165165+166166+**Output:**
167167+```
168168+Using locked dependencies from mlf-lock.toml
169169+Fetching 2 lexicon(s) from lockfile...
170170+171171+Refetching: place.stream.richtext.facet
172172+ → Using PDS: https://stream.place
173173+ → Saved JSON (checksum verified)
174174+ → Converted to MLF
175175+176176+✓ Successfully fetched all 2 lexicons
177177+```
178178+179179+If no lockfile exists:
180180+```
181181+✗ No lockfile found. Run `mlf fetch` first to create mlf-lock.toml
182182+```
183183+83184### Fetch Specific Lexicon
8418585186```bash
···114215```
115216.mlf/
116217├── .gitignore # Auto-generated
117117-├── .lexicon-cache.toml # Cache metadata
118218└── lexicons/
119219 ├── json/ # Original JSON lexicons
120220 │ ├── com/
···142242 └── post.mlf
143243```
144244145145-### Cache File
146146-147147-The `.lexicon-cache.toml` tracks what's been fetched:
148148-149149-```toml
150150-[[lexicons."com.example.forum.post"]]
151151-nsid = "com.example.forum.post"
152152-fetched_at = "2024-01-15T10:30:00Z"
153153-did = "did:web:example.com"
154154-hash = "abc123..."
155155-```
245245+**Note:** The lockfile (`mlf-lock.toml`) lives at the project root, sibling to `mlf.toml`.
156246157247## DNS Resolution
158248···222312...
223313```
224314225225-## Re-fetching
226226-227227-If a lexicon is already cached, fetch skips it:
228228-229229-```bash
230230-$ mlf fetch com.example.forum.post
231231-Lexicon 'com.example.forum.post' is already cached. Skipping fetch.
232232- (Use --force to re-fetch)
233233-```
234234-235235-To re-fetch:
236236-237237-```bash
238238-mlf fetch com.example.forum.post --force # Not yet implemented
239239-```
240240-241315## Error Handling
242316243317### DNS Errors
···276350### Invalid NSID Format
277351278352```
279279-✗ NSID must have at least 3 segments or use wildcard (e.g., 'com.example.forum.post' or 'com.example.forum.*'): com.example
353353+✗ NSID must have at least 2 segments or use wildcard: com
280354```
281355282356**Solution:**
283357- Use a specific NSID: `com.example.forum.post`
284358- Or use a wildcard: `com.example.forum.*`
285359286286-## Best Practices
360360+### Checksum Mismatch (--locked mode)
287361288288-1. **Fetch before work** - Always fetch dependencies before coding
289289-2. **Use --save** - Keep `mlf.toml` up to date with dependencies
290290-3. **Don't commit `.mlf/`** - Let each developer fetch independently
291291-4. **Check DNS** - Verify TXT records before fetching
292292-5. **Version dependencies** - Consider tracking lexicon versions (future feature)
362362+```
363363+✗ Checksum mismatch for place.stream.facet: expected sha256:abc123, got sha256:def456
364364+```
365365+366366+**Causes:**
367367+- Lexicon was updated on the server
368368+- Lock file is out of date
369369+370370+**Solution:**
371371+```bash
372372+mlf fetch --update # Update lockfile with new checksums
373373+```
374374+375375+## Best Practices
293376377377+1. **Commit lockfile** - Always commit `mlf-lock.toml` to version control
378378+2. **Use --locked in CI** - Ensures reproducible builds in CI/CD pipelines
379379+3. **Fetch before work** - Always fetch dependencies before coding
380380+4. **Use --save** - Keep `mlf.toml` up to date with dependencies
381381+5. **Don't commit `.mlf/`** - Let each developer fetch independently
382382+6. **Check DNS** - Verify TXT records before fetching
383383+7. **Update explicitly** - Use `mlf fetch --update` when you want latest versions
294384295385## Comparison with npm/cargo
296386297297-The fetch command is similar to package managers:
387387+The fetch command follows patterns from popular package managers:
298388299299-| Command | npm | cargo | mlf |
300300-|---------|-----|-------|-----|
301301-| Install deps | `npm install` | `cargo fetch` | `mlf fetch` |
302302-| Add dep | `npm install pkg --save` | `cargo add pkg` | `mlf fetch ns --save` |
389389+| Aspect | npm | Cargo | MLF |
390390+|--------|-----|-------|-----|
391391+| Install deps | `npm install` | `cargo build` | `mlf fetch` |
392392+| Update deps | `npm update` | `cargo update` | `mlf fetch --update` |
393393+| Strict mode | `npm ci` | `cargo build --locked` | `mlf fetch --locked` |
394394+| Add dep | `npm install pkg` | `cargo add pkg` | `mlf fetch ns --save` |
303395| Config file | `package.json` | `Cargo.toml` | `mlf.toml` |
396396+| Lock file | `package-lock.json` | `Cargo.lock` | `mlf-lock.toml` |
304397| Cache | `node_modules/` | `~/.cargo/` | `.mlf/` |
305398306399## Troubleshooting
···321414- ✓ `com.example.forum.post` (specific lexicon)
322415- ✓ `com.example.forum.*` (wildcard)
323416- ✓ `app.bsky.feed.*` (real-world wildcard)
324324-- ✗ `com.example` (must be specific or use wildcard)
417417+- ✗ `com` (must have at least 2 segments)
325418326419### Permission Errors
327420328328-Ensure you have write permissions for the project directory to create `.mlf/`.
421421+Ensure you have write permissions for the project directory to create `.mlf/` and `mlf-lock.toml`.
422422+423423+### Conflicting Flags
424424+425425+```
426426+✗ Cannot use --update and --locked together
427427+```
428428+429429+Choose one mode:
430430+- Use `--update` to get latest versions
431431+- Use `--locked` for strict reproducible builds