···4343 .load_car(reader)
4444 .await?
4545 {
4646- Driver::Memory(_, _) => panic!("try this on a bigger car"),
4747- Driver::Disk(big_stuff) => {
4646+ None => panic!("empty mst! try a bigger car"),
4747+ Some(Driver::Memory(_, _)) => panic!("try this on a bigger car"),
4848+ Some(Driver::Disk(big_stuff)) => {
4849 // we reach here if the repo was too big and needs to be spilled to
4950 // disk to continue
5051···6162 // pop the driver back out to get some code indentation relief
6263 driver
6364 }
6565+ };
6666+6767+ let Some(driver) = driver else {
6868+ panic!("big car but somehow empty MST: is the archive stuffed with garbage?");
6469 };
65706671 // collect some random stats about the blocks
+56-32
src/drive.rs
···11//! Consume a CAR from an AsyncRead, producing an ordered stream of records
2233+use crate::walk::Output;
34use crate::Bytes;
45use crate::HashMap;
56use crate::disk::{DiskError, DiskStore};
66-use crate::mst::Node;
77+use crate::mst::{Node, MstNode};
78use cid::Cid;
89use iroh_car::CarReader;
910use std::convert::Infallible;
1011use tokio::{io::AsyncRead, sync::mpsc};
11121213use crate::mst::Commit;
1313-use crate::walk::{Step, WalkError, Walker};
1414+use crate::walk::{WalkError, Walker};
14151516/// Errors that can happen while consuming and emitting blocks and records
1617#[derive(Debug, thiserror::Error)]
···157158 }
158159 }
159160 /// Begin processing an atproto MST from a CAR file
160160- pub async fn load_car<R: AsyncRead + Unpin>(&self, reader: R) -> Result<Driver<R>, DriveError> {
161161+ pub async fn load_car<R: AsyncRead + Unpin>(
162162+ &self,
163163+ reader: R,
164164+ ) -> Result<Option<Driver<R>>, DriveError> {
161165 Driver::load_car(reader, noop, self.mem_limit_mb).await
162166 }
163167}
···180184 self
181185 }
182186 /// Begin processing an atproto MST from a CAR file
183183- pub async fn load_car<R: AsyncRead + Unpin>(&self, reader: R) -> Result<Driver<R>, DriveError> {
187187+ pub async fn load_car<R: AsyncRead + Unpin>(
188188+ &self,
189189+ reader: R,
190190+ ) -> Result<Option<Driver<R>>, DriveError> {
184191 Driver::load_car(reader, self.block_processor, self.mem_limit_mb).await
185192 }
186193}
···199206 reader: R,
200207 process: fn(Bytes) -> Bytes,
201208 mem_limit_mb: usize,
202202- ) -> Result<Driver<R>, DriveError> {
209209+ ) -> Result<Option<Driver<R>>, DriveError> {
203210 let max_size = mem_limit_mb * 2_usize.pow(20);
204211 let mut mem_blocks = HashMap::new();
205212···225232 continue;
226233 }
227234228228- let data = Bytes::from(data);
229229-230235 // remaining possible types: node, record, other. optimistically process
231236 let maybe_processed = MaybeProcessedBlock::maybe(process, data);
232237···234239 mem_size += maybe_processed.len();
235240 mem_blocks.insert(cid, maybe_processed);
236241 if mem_size >= max_size {
237237- return Ok(Driver::Disk(NeedDisk {
242242+ return Ok(Some(Driver::Disk(NeedDisk {
238243 car,
239244 root,
240245 process,
241246 max_size,
242247 mem_blocks,
243248 commit,
244244- }));
249249+ })));
245250 }
246251 }
247252248253 // all blocks loaded and we fit in memory! hopefully we found the commit...
249254 let commit = commit.ok_or(DriveError::MissingCommit)?;
250255251251- let walker = Walker::new(commit.data);
256256+ // the commit always must point to a Node; empty node => empty MST special case
257257+ let node: MstNode = match mem_blocks.get(&commit.data).ok_or(DriveError::MissingCommit)? {
258258+ MaybeProcessedBlock::Processed(_) => Err(WalkError::BadCommitFingerprint)?,
259259+ MaybeProcessedBlock::Raw(bytes) => serde_ipld_dagcbor::from_slice(bytes)?,
260260+ };
261261+ if node.is_empty() {
262262+ // TODO: actually we still want the commit in this case
263263+ return Ok(None);
264264+ }
265265+ let depth = node.depth.unwrap();
266266+267267+ let walker = Walker::new(commit.data, depth);
252268253253- Ok(Driver::Memory(
269269+ Ok(Some(Driver::Memory(
254270 commit,
255271 MemDriver {
256272 blocks: mem_blocks,
257273 walker,
258274 process,
259275 },
260260- ))
276276+ )))
261277 }
262278}
263279···287303 let mut out = Vec::with_capacity(n);
288304 for _ in 0..n {
289305 // walk as far as we can until we run out of blocks or find a record
290290- match self.walker.step(&mut self.blocks, self.process)? {
291291- Step::Finish => break,
292292- Step::Found { rkey, data } => {
293293- out.push((rkey, data));
294294- continue;
295295- }
306306+ let Some(Output { rkey, cid: _, data }) = self.walker.step(&mut self.blocks, self.process)? else {
307307+ break;
296308 };
309309+ out.push((rkey, data));
297310 }
298298-299311 if out.is_empty() {
300312 Ok(None)
301313 } else {
···318330 pub async fn finish_loading(
319331 mut self,
320332 mut store: DiskStore,
321321- ) -> Result<(Commit, DiskDriver), DriveError> {
333333+ ) -> Result<(Commit, Option<DiskDriver>), DriveError> {
322334 // move store in and back out so we can manage lifetimes
323335 // dump mem blocks into the store
324336 store = tokio::task::spawn(async move {
···390402391403 let commit = self.commit.ok_or(DriveError::MissingCommit)?;
392404393393- let walker = Walker::new(commit.data);
405405+ // the commit always must point to a Node; empty node => empty MST special case
406406+ let db_bytes = store
407407+ .get(&commit.data.to_bytes())
408408+ .map_err(|e| DriveError::StorageError(DiskError::DbError(e)))?
409409+ .ok_or(DriveError::MissingCommit)?;
410410+411411+ let node: MstNode = match MaybeProcessedBlock::from_bytes(db_bytes.to_vec()) {
412412+ MaybeProcessedBlock::Processed(_) => Err(WalkError::BadCommitFingerprint)?,
413413+ MaybeProcessedBlock::Raw(bytes) => serde_ipld_dagcbor::from_slice(&bytes)?,
414414+ };
415415+ if node.is_empty() {
416416+ return Ok((commit, None));
417417+ }
418418+ let depth = node.depth.unwrap();
419419+420420+ let walker = Walker::new(commit.data, depth);
394421395422 Ok((
396423 commit,
397397- DiskDriver {
424424+ Some(DiskDriver {
398425 process: self.process,
399426 state: Some(BigState { store, walker }),
400400- },
427427+ }),
401428 ))
402429 }
403430}
···459486 return (state, Err(e.into()));
460487 }
461488 };
462462- match step {
463463- Step::Finish => break,
464464- Step::Found { rkey, data } => out.push((rkey, data)),
489489+ let Some(Output { rkey, cid: _, data }) = step else {
490490+ break;
465491 };
492492+ out.push((rkey, data));
466493 }
467494468495 (state, Ok::<_, DriveError>(out))
···499526 Err(e) => return tx.blocking_send(Err(e.into())),
500527 };
501528502502- match step {
503503- Step::Finish => return Ok(()),
504504- Step::Found { rkey, data } => {
505505- out.push((rkey, data));
506506- continue;
507507- }
529529+ let Some(Output { rkey, cid: _, data }) = step else {
530530+ break;
508531 };
532532+ out.push((rkey, data));
509533 }
510534511535 if out.is_empty() {
+74-23
src/mst.rs
···33//! The primary aim is to work through the **tree** structure. Non-node blocks
44//! are left as raw bytes, for upper levels to parse into DAG-CBOR or whatever.
5566+use sha2::{Digest, Sha256};
67use cid::Cid;
78use serde::Deserialize;
88-use crate::walk::Depth;
991010/// The top-level data object in a repository's tree is a signed commit.
1111#[derive(Debug, Deserialize)]
···3737 pub sig: serde_bytes::ByteBuf,
3838}
39394040-use serde::{de, de::{Deserializer, Visitor, MapAccess, SeqAccess}};
4040+use serde::de::{self, Deserializer, Visitor, MapAccess, SeqAccess, Unexpected};
4141use std::fmt;
42424343-pub(crate) enum NodeEntry {
4444- Value(Cid, Vec<u8>), // rkey
4545- Tree(Cid, u32), // depth
4343+pub type Depth = u32;
4444+4545+#[inline(always)]
4646+pub fn atproto_mst_depth(key: &str) -> Depth {
4747+ // 128 bits oughta be enough: https://bsky.app/profile/retr0.id/post/3jwwbf4izps24
4848+ u128::from_be_bytes(Sha256::digest(key).split_at(16).0.try_into().unwrap()).leading_zeros() / 2
4649}
47505151+#[derive(Debug)]
4852pub(crate) struct MstNode {
4949- pub left: Option<Cid>, // a tree but we don't know the depth
5050- pub entries: Vec<NodeEntry>,
5353+ pub depth: Option<Depth>, // known for nodes with entries (required for root)
5454+ pub things: Vec<NodeThing>,
5555+}
5656+5757+#[derive(Debug)]
5858+pub(crate) struct NodeThing {
5959+ pub(crate) cid: Cid,
6060+ pub(crate) kind: ThingKind,
6161+}
6262+6363+#[derive(Debug)]
6464+pub(crate) enum ThingKind {
6565+ Tree,
6666+ Value { rkey: String },
5167}
52685353-pub(crate) struct Entries(pub(crate) Vec<NodeEntry>);
6969+pub(crate) struct Entries(Vec<NodeThing>, Option<Depth>);
54705571impl<'de> Deserialize<'de> for Entries {
5672 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
···6985 where
7086 S: SeqAccess<'de>,
7187 {
7272- let mut children: Vec<NodeEntry> = Vec::with_capacity(seq.size_hint().unwrap_or(5));
8888+ let mut children: Vec<NodeThing> = Vec::with_capacity(seq.size_hint().unwrap_or(5));
7389 let mut prefix: Vec<u8> = vec![];
9090+ let mut depth = None;
7491 while let Some(entry) = seq.next_element::<Entry>()? {
7592 let mut rkey: Vec<u8> = vec![];
7693 let pre_checked = prefix
7794 .get(..entry.prefix_len)
7878- // .ok_or(MstError::EntryPrefixOutOfbounds)?;
7979- .ok_or_else(|| todo!()).unwrap();
9595+ .ok_or_else(|| de::Error::invalid_value(
9696+ Unexpected::Bytes(&prefix),
9797+ &"a prefix at least as long as the prefix_len",
9898+ ))?;
809981100 rkey.extend_from_slice(pre_checked);
82101 rkey.extend_from_slice(&entry.keysuffix);
8383- let depth = Depth::compute(&rkey);
841028585- prefix = rkey.clone();
103103+ let rkey_s = String::from_utf8(rkey.clone())
104104+ .map_err(|_| de::Error::invalid_value(
105105+ Unexpected::Bytes(&rkey),
106106+ &"a valid utf-8 rkey",
107107+ ))?;
108108+109109+ let key_depth = atproto_mst_depth(&rkey_s);
110110+ if depth.is_none() {
111111+ depth = Some(key_depth);
112112+ } else if Some(key_depth) != depth {
113113+ return Err(de::Error::invalid_value(
114114+ Unexpected::Bytes(&prefix),
115115+ &"all rkeys to have equal MST depth",
116116+ ));
117117+ }
861188787- children.push(NodeEntry::Value(entry.value, rkey));
119119+ children.push(NodeThing {
120120+ cid: entry.value,
121121+ kind: ThingKind::Value { rkey: rkey_s },
122122+ });
881238989- if let Some(ref tree) = entry.tree {
9090- children.push(NodeEntry::Tree(*tree, depth));
124124+ if let Some(cid) = entry.tree {
125125+ children.push(NodeThing {
126126+ cid,
127127+ kind: ThingKind::Tree,
128128+ });
91129 }
130130+131131+ prefix = rkey;
92132 }
9393- Ok(Entries(children))
133133+134134+ Ok(Entries(children, depth))
94135 }
95136 }
96137 deserializer.deserialize_seq(EntriesVisitor)
···117158 let mut found_left = false;
118159 let mut left = None;
119160 let mut found_entries = false;
120120- let mut entries = Vec::with_capacity(4); // "fanout of 4" so does this make sense????
161161+ let mut things = Vec::with_capacity(4); // "fanout of 4" so does this make sense????
162162+ let mut depth = None;
121163122164 while let Some(key) = map.next_key()? {
123165 match key {
···126168 return Err(de::Error::duplicate_field("l"));
127169 }
128170 found_left = true;
129129- left = map.next_value()?;
171171+ if let Some(cid) = map.next_value()? {
172172+ left = Some(NodeThing { cid, kind: ThingKind::Tree });
173173+ }
130174 }
131175 "e" => {
132176 if found_entries {
133177 return Err(de::Error::duplicate_field("e"));
134178 }
135179 found_entries = true;
136136- let mut child_entries: Entries = map.next_value()?;
137137- entries.append(&mut child_entries.0);
180180+ let Entries(mut child_entries, d) = map.next_value()?;
181181+ things.append(&mut child_entries);
182182+ depth = d;
138183 },
139184 f => return Err(de::Error::unknown_field(f, NODE_FIELDS))
140185 }
···145190 if !found_entries {
146191 return Err(de::Error::missing_field("e"));
147192 }
148148- Ok(MstNode { left, entries })
193193+194194+ things.reverse();
195195+ if let Some(l) = left {
196196+ things.push(l);
197197+ }
198198+199199+ Ok(MstNode { depth, things })
149200 }
150201 }
151202···156207157208impl MstNode {
158209 pub(crate) fn is_empty(&self) -> bool {
159159- self.left.is_none() && self.entries.is_empty()
210210+ self.things.is_empty()
160211 }
161212}
162213
+107-280
src/walk.rs
···11//! Depth-first MST traversal
2233-use crate::mst::NodeEntry;
33+use crate::mst::NodeThing;
44+use crate::mst::ThingKind;
45use crate::mst::MstNode;
66+use crate::mst::Depth;
57use crate::Bytes;
68use crate::HashMap;
79use crate::disk::DiskStore;
810use crate::drive::MaybeProcessedBlock;
911use cid::Cid;
1010-use sha2::{Digest, Sha256};
1112use std::convert::Infallible;
12131314/// Errors that can happen while walking
···2829/// Errors from invalid Rkeys
2930#[derive(Debug, PartialEq, thiserror::Error)]
3031pub enum MstError {
3131- #[error("Failed to compute an rkey due to invalid prefix_len")]
3232- EntryPrefixOutOfbounds,
3332 #[error("RKey was not utf-8")]
3433 EntryRkeyNotUtf8(#[from] std::string::FromUtf8Error),
3534 #[error("Nodes cannot be empty (except for an entirely empty MST)")]
3635 EmptyNode,
3737- #[error("Found an entry with rkey at the wrong depth")]
3838- WrongDepth,
3939- #[error("Lost track of our depth (possible bug?)")]
4040- LostDepth,
3636+ #[error("Expected node to be at depth {expected}, but it was at {depth}")]
3737+ WrongDepth { depth: Depth, expected: Depth },
4138 #[error("MST depth underflow: depth-0 node with child trees")]
4239 DepthUnderflow,
4343- #[error("Encountered an rkey out of order while walking the MST")]
4444- RkeyOutOfOrder,
4040+ #[error("Encountered rkey {rkey:?} which cannot follow the previous: {prev:?}")]
4141+ RkeyOutOfOrder { prev: String, rkey: String },
4542}
46434744/// Walker outputs
4848-#[derive(Debug)]
4949-pub enum Step {
5050- /// Reached the end of the MST! yay!
5151- Finish,
5252- /// A record was found!
5353- Found { rkey: String, data: Bytes },
5454-}
5555-5656-#[derive(Debug, Clone, PartialEq)]
5757-enum Need {
5858- Node { depth: Depth, cid: Cid },
5959- Record { rkey: String, cid: Cid },
6060-}
6161-6262-#[derive(Debug, Clone, Copy, PartialEq)]
6363-pub enum Depth {
6464- Root,
6565- Depth(u32),
6666-}
6767-6868-impl Depth {
6969- fn from_key(key: &[u8]) -> Self {
7070- let mut zeros = 0;
7171- for byte in Sha256::digest(key) {
7272- let leading = byte.leading_zeros();
7373- zeros += leading;
7474- if leading < 8 {
7575- break;
7676- }
7777- }
7878- Self::Depth(zeros / 2) // truncating divide (rounds down)
7979- }
8080- fn next_expected(&self) -> Result<Option<u32>, MstError> {
8181- match self {
8282- Self::Root => Ok(None),
8383- Self::Depth(d) => d.checked_sub(1).ok_or(MstError::DepthUnderflow).map(Some),
8484- }
8585- }
8686- pub fn compute(key: &[u8]) -> u32 {
8787- let Depth::Depth(d) = Self::from_key(key) else {
8888- panic!("errr");
8989- };
9090- d
9191- }
9292-}
9393-9494-fn push_from_node(stack: &mut Vec<Need>, node: &MstNode, parent_depth: Depth) -> Result<(), MstError> {
9595- // empty nodes are not allowed in the MST except in an empty MST
9696- if node.is_empty() {
9797- if parent_depth == Depth::Root {
9898- return Ok(()); // empty mst, nothing to push
9999- } else {
100100- return Err(MstError::EmptyNode);
101101- }
102102- }
103103-104104- let mut this_depth = parent_depth.next_expected()?;
105105-106106- for entry in node.entries.iter().rev() {
107107- // ok this loop sucks now esp with depth checking
108108- // should keep the entries together with a shared depth on the rkey
109109- // ...maybe. skipping the absent trees is nice?
110110- match entry {
111111- NodeEntry::Value(cid, rkey) => {
112112- stack.push(Need::Record {
113113- rkey: String::from_utf8(rkey.to_vec())?,
114114- cid: *cid,
115115- });
116116- }
117117- NodeEntry::Tree(cid, depth) => {
118118- if let Some(expected) = this_depth {
119119- if *depth != expected {
120120- return Err(MstError::WrongDepth);
121121- }
122122- } else {
123123- // this_depth is `none` if we are the deepest child (directly below root)
124124- // in that case we accept whatever highest depth is claimed
125125- this_depth = Some(*depth);
126126- }
127127- stack.push(Need::Node {
128128- depth: Depth::Depth(*depth),
129129- cid: *cid,
130130- });
131131- }
132132- }
133133-134134- }
135135-136136- let d = this_depth.ok_or(MstError::LostDepth)?;
137137- if let Some(tree) = node.left {
138138- stack.push(Need::Node {
139139- depth: Depth::Depth(d),
140140- cid: tree,
141141- });
142142- }
143143- Ok(())
4545+#[derive(Debug, PartialEq)]
4646+pub struct Output {
4747+ pub rkey: String,
4848+ pub cid: Cid,
4949+ pub data: Bytes,
14450}
1455114652/// Traverser of an atproto MST
···14854/// Walks the tree from left-to-right in depth-first order
14955#[derive(Debug)]
15056pub struct Walker {
151151- stack: Vec<Need>,
152152- prev: String,
5757+ prev_rkey: String,
5858+ todo: Vec<(Depth, NodeThing)>,
15359}
1546015561impl Walker {
156156- pub fn new(tree_root_cid: Cid) -> Self {
6262+ pub fn new(
6363+ root_cid: Cid,
6464+ depth: Depth,
6565+ ) -> Self {
15766 Self {
158158- stack: vec![Need::Node {
159159- depth: Depth::Root,
160160- cid: tree_root_cid,
161161- }],
162162- prev: "".to_string(),
6767+ prev_rkey: "".to_string(),
6868+ todo: vec![(
6969+ depth + 1, // we're kind of inventing a fake root one above the real root
7070+ // ... maybe we should just pass in the real root here???
7171+ NodeThing {
7272+ cid: root_cid,
7373+ kind: ThingKind::Tree,
7474+ },
7575+ )],
16376 }
16477 }
16578166166- /// Advance through nodes until we find a record or can't go further
167167- pub fn step(
7979+ fn mpb_step(
16880 &mut self,
169169- blocks: &mut HashMap<Cid, MaybeProcessedBlock>,
8181+ depth: Depth,
8282+ kind: ThingKind,
8383+ cid: Cid,
8484+ mpb: &MaybeProcessedBlock,
17085 process: impl Fn(Bytes) -> Bytes,
171171- ) -> Result<Step, WalkError> {
172172- loop {
173173- let Some(need) = self.stack.last_mut() else {
174174- log::trace!("tried to walk but we're actually done.");
175175- return Ok(Step::Finish);
176176- };
8686+ ) -> Result<Option<Output>, WalkError> {
8787+ match kind {
8888+ ThingKind::Value { rkey } => {
8989+ let data = match mpb {
9090+ MaybeProcessedBlock::Raw(data) => process(data.clone()),
9191+ MaybeProcessedBlock::Processed(t) => t.clone(),
9292+ };
17793178178- match need {
179179- &mut Need::Node { depth, cid } => {
180180- log::trace!("need node {cid:?}");
181181- let Some(block) = blocks.remove(&cid) else {
182182- return Err(WalkError::MissingBlock(cid));
183183- };
9494+ if rkey <= self.prev_rkey {
9595+ return Err(WalkError::MstError(MstError::RkeyOutOfOrder {
9696+ rkey,
9797+ prev: self.prev_rkey.clone(),
9898+ }));
9999+ }
100100+ self.prev_rkey = rkey.clone();
184101185185- let MaybeProcessedBlock::Raw(data) = block else {
186186- return Err(WalkError::BadCommitFingerprint);
187187- };
188188- let node = serde_ipld_dagcbor::from_slice::<crate::mst::MstNode>(&data)
189189- .map_err(WalkError::BadCommit)?;
102102+ Ok(Some(Output {
103103+ rkey,
104104+ cid,
105105+ data,
106106+ }))
107107+ }
108108+ ThingKind::Tree => {
109109+ let MaybeProcessedBlock::Raw(data) = mpb else {
110110+ return Err(WalkError::BadCommitFingerprint);
111111+ };
190112191191- // found node, make sure we remember
192192- self.stack.pop();
113113+ let node: MstNode = serde_ipld_dagcbor::from_slice(&data)
114114+ .map_err(WalkError::BadCommit)?;
193115194194- // queue up work on the found node next
195195- push_from_node(&mut self.stack, &node, depth)?;
116116+ if node.is_empty() {
117117+ return Err(WalkError::MstError(MstError::EmptyNode));
196118 }
197197- Need::Record { rkey, cid } => {
198198- log::trace!("need record {cid:?}");
199199- // note that we cannot *remove* a record block, sadly, since
200200- // there can be multiple rkeys pointing to the same cid.
201201- let Some(data) = blocks.get(cid) else {
202202- return Err(WalkError::MissingBlock(*cid));
203203- };
204204- let rkey = rkey.clone();
205205- let data = match data {
206206- MaybeProcessedBlock::Raw(data) => process(data.clone()),
207207- MaybeProcessedBlock::Processed(t) => t.clone(),
208208- };
209119210210- // found node, make sure we remember
211211- self.stack.pop();
212212-213213- // rkeys *must* be in order or else the tree is invalid (or
214214- // we have a bug)
215215- if rkey <= self.prev {
216216- return Err(MstError::RkeyOutOfOrder)?;
120120+ let next_depth = depth.checked_sub(1).ok_or(MstError::DepthUnderflow)?;
121121+ if let Some(d) = node.depth {
122122+ if d != next_depth {
123123+ return Err(WalkError::MstError(MstError::WrongDepth {
124124+ depth: d,
125125+ expected: next_depth,
126126+ }));
217127 }
218218- self.prev = rkey.clone();
128128+ }
219129220220- return Ok(Step::Found { rkey, data });
130130+ for thing in node.things {
131131+ self.todo.push((next_depth, thing));
221132 }
133133+134134+ Ok(None)
222135 }
223136 }
224137 }
225138226226- /// blocking!!!!!!
227227- pub fn disk_step(
139139+ /// Advance through nodes until we find a record or can't go further
140140+ pub fn step(
228141 &mut self,
229229- reader: &mut DiskStore,
142142+ blocks: &mut HashMap<Cid, MaybeProcessedBlock>,
230143 process: impl Fn(Bytes) -> Bytes,
231231- ) -> Result<Step, WalkError> {
232232- loop {
233233- let Some(need) = self.stack.last_mut() else {
234234- log::trace!("tried to walk but we're actually done.");
235235- return Ok(Step::Finish);
144144+ ) -> Result<Option<Output>, WalkError> {
145145+146146+ while let Some((depth, NodeThing { cid, kind })) = self.todo.pop() {
147147+ let Some(mpb) = blocks.get(&cid) else {
148148+ return Err(WalkError::MissingBlock(cid));
236149 };
237237-238238- match need {
239239- &mut Need::Node { depth, cid } => {
240240- let cid_bytes = cid.to_bytes();
241241- log::trace!("need node {cid:?}");
242242- let Some(block_slice) = reader.get(&cid_bytes)? else {
243243- return Err(WalkError::MissingBlock(cid));
244244- };
245245-246246- let block = MaybeProcessedBlock::from_bytes(block_slice.to_vec());
150150+ if let Some(out) = self.mpb_step(depth, kind, cid, mpb, &process)? {
151151+ return Ok(Some(out));
152152+ }
153153+ }
247154248248- let MaybeProcessedBlock::Raw(data) = block else {
249249- return Err(WalkError::BadCommitFingerprint);
250250- };
251251- let node = serde_ipld_dagcbor::from_slice::<MstNode>(&data)
252252- .map_err(WalkError::BadCommit)?;
155155+ log::trace!("tried to walk but we're actually done.");
156156+ Ok(None)
157157+ }
253158254254- // found node, make sure we remember
255255- self.stack.pop();
159159+ /// blocking!!!!!!
160160+ pub fn disk_step(
161161+ &mut self,
162162+ blocks: &mut DiskStore,
163163+ process: impl Fn(Bytes) -> Bytes,
164164+ ) -> Result<Option<Output>, WalkError> {
256165257257- // queue up work on the found node next
258258- push_from_node(&mut self.stack, &node, depth).map_err(WalkError::MstError)?;
259259- }
260260- Need::Record { rkey, cid } => {
261261- log::trace!("need record {cid:?}");
262262- let cid_bytes = cid.to_bytes();
263263- let Some(data_slice) = reader.get(&cid_bytes)? else {
264264- return Err(WalkError::MissingBlock(*cid));
265265- };
266266- let data = MaybeProcessedBlock::from_bytes(data_slice.to_vec());
267267- let rkey = rkey.clone();
268268- let data = match data {
269269- MaybeProcessedBlock::Raw(data) => process(data),
270270- MaybeProcessedBlock::Processed(t) => t,
271271- };
272272-273273- // found node, make sure we remember
274274- self.stack.pop();
275275-276276- log::trace!("emitting a block as a step. depth={}", self.stack.len());
277277-278278- // rkeys *must* be in order or else the tree is invalid (or
279279- // we have a bug)
280280- if rkey <= self.prev {
281281- return Err(MstError::RkeyOutOfOrder)?;
282282- }
283283- self.prev = rkey.clone();
284284-285285- return Ok(Step::Found { rkey, data });
286286- }
166166+ while let Some((depth, NodeThing { cid, kind })) = self.todo.pop() {
167167+ let Some(block_slice) = blocks.get(&cid.to_bytes())? else {
168168+ return Err(WalkError::MissingBlock(cid));
169169+ };
170170+ let mpb = MaybeProcessedBlock::from_bytes(block_slice.to_vec());
171171+ if let Some(out) = self.mpb_step(depth, kind, cid, &mpb, &process)? {
172172+ return Ok(Some(out));
287173 }
288174 }
175175+ log::trace!("tried to walk but we're actually done.");
176176+ Ok(None)
289177 }
290178}
291179···293181mod test {
294182 use super::*;
295183296296- fn cid1() -> Cid {
297297- "bafyreihixenvk3ahqbytas4hk4a26w43bh6eo3w6usjqtxkpzsvi655a3m"
298298- .parse()
299299- .unwrap()
300300- }
301301-302302- #[test]
303303- fn test_depth_spec_0() {
304304- let d = Depth::from_key(b"2653ae71");
305305- assert_eq!(d, Depth::Depth(0))
306306- }
307307-308308- #[test]
309309- fn test_depth_spec_1() {
310310- let d = Depth::from_key(b"blue");
311311- assert_eq!(d, Depth::Depth(1))
312312- }
313313-314314- #[test]
315315- fn test_depth_spec_4() {
316316- let d = Depth::from_key(b"app.bsky.feed.post/454397e440ec");
317317- assert_eq!(d, Depth::Depth(4))
318318- }
319319-320320- #[test]
321321- fn test_depth_spec_8() {
322322- let d = Depth::from_key(b"app.bsky.feed.post/9adeb165882c");
323323- assert_eq!(d, Depth::Depth(8))
324324- }
325325-326326- #[test]
327327- fn test_depth_ietf_draft_0() {
328328- let d = Depth::from_key(b"key1");
329329- assert_eq!(d, Depth::Depth(0))
330330- }
331331-332332- #[test]
333333- fn test_depth_ietf_draft_1() {
334334- let d = Depth::from_key(b"key7");
335335- assert_eq!(d, Depth::Depth(1))
336336- }
337337-338338- #[test]
339339- fn test_depth_ietf_draft_4() {
340340- let d = Depth::from_key(b"key515");
341341- assert_eq!(d, Depth::Depth(4))
342342- }
343343-344344- #[test]
345345- fn test_depth_interop() {
346346- // examples from https://github.com/bluesky-social/atproto-interop-tests/blob/main/mst/key_heights.json
347347- for (k, expected) in [
348348- ("", 0),
349349- ("asdf", 0),
350350- ("blue", 1),
351351- ("2653ae71", 0),
352352- ("88bfafc7", 2),
353353- ("2a92d355", 4),
354354- ("884976f5", 6),
355355- ("app.bsky.feed.post/454397e440ec", 4),
356356- ("app.bsky.feed.post/9adeb165882c", 8),
357357- ] {
358358- let d = Depth::from_key(k.as_bytes());
359359- assert_eq!(d, Depth::Depth(expected), "key: {}", k);
360360- }
361361- }
184184+ // fn cid1() -> Cid {
185185+ // "bafyreihixenvk3ahqbytas4hk4a26w43bh6eo3w6usjqtxkpzsvi655a3m"
186186+ // .parse()
187187+ // .unwrap()
188188+ // }
362189363190 // #[test]
364191 // fn test_push_empty_fails() {