···5858```
59596060more recent todo
6161-6262-- [ ] get an *emtpy* car for the test suite
6161+- [ ] repo car slices
6262+- [ ] lazy-value stream (rkey -> CID diffing for tap-like `#sync` handling)
6363+- [x] get an *emtpy* car for the test suite
6364- [x] implement a max size on disk limit
64656566···70717172current car processing times (records processed into their length usize, phil's dev machine):
72737373-- 128MiB CAR file: `347ms`
7474-- 5.0MiB: `6.1ms`
7575-- 279KiB: `139us`
7676-- 3.4KiB: `4.9us`
7474+- 128MiB CAR file: `350ms`
7575+- 5.0MiB: `6.8ms`
7676+- 279KiB: `170us`
7777+- 3.4KiB: `5.2us`
7878+- empty: `710ns`
7979+8080+it's a little faster with `mimalloc`
8181+8282+```rust
8383+use mimalloc::MiMalloc;
8484+#[global_allocator]
8585+static GLOBAL: MiMalloc = MiMalloc;
8686+```
8787+8888+- 128MiB CAR file: `310ms` (-13%)
8989+- 5.0MiB: `6.1ms` (-10%)
9090+- 279KiB: `160us` (-5%)
9191+- 3.4KiB: `5.7us` (-9%)
9292+- empty: `660ns` (-7%)
779378947995running the huge-car benchmark
+5-11
src/drive.rs
···254254 let commit = commit.ok_or(DriveError::MissingCommit)?;
255255256256 // the commit always must point to a Node; empty node => empty MST special case
257257- let node: MstNode = match mem_blocks.get(&commit.data).ok_or(DriveError::MissingCommit)? {
257257+ let root_node: MstNode = match mem_blocks.get(&commit.data).ok_or(DriveError::MissingCommit)? {
258258 MaybeProcessedBlock::Processed(_) => Err(WalkError::BadCommitFingerprint)?,
259259 MaybeProcessedBlock::Raw(bytes) => serde_ipld_dagcbor::from_slice(bytes)?,
260260 };
261261- if node.is_empty() {
261261+ let Some(walker) = Walker::new(root_node) else {
262262 // TODO: actually we still want the commit in this case
263263 return Ok(None);
264264- }
265265- let depth = node.depth.unwrap();
266266-267267- let walker = Walker::new(commit.data, depth);
264264+ };
268265269266 Ok(Some(Driver::Memory(
270267 commit,
···412409 MaybeProcessedBlock::Processed(_) => Err(WalkError::BadCommitFingerprint)?,
413410 MaybeProcessedBlock::Raw(bytes) => serde_ipld_dagcbor::from_slice(&bytes)?,
414411 };
415415- if node.is_empty() {
412412+ let Some(walker) = Walker::new(node) else {
416413 return Ok((commit, None));
417417- }
418418- let depth = node.depth.unwrap();
419419-420420- let walker = Walker::new(commit.data, depth);
414414+ };
421415422416 Ok((
423417 commit,
+28-27
src/walk.rs
···5555#[derive(Debug)]
5656pub struct Walker {
5757 prev_rkey: String,
5858- todo: Vec<(Depth, NodeThing)>,
5858+ root_depth: Depth,
5959+ todo: Vec<Vec<NodeThing>>,
5960}
60616162impl Walker {
6263 pub fn new(
6363- root_cid: Cid,
6464- depth: Depth,
6565- ) -> Self {
6666- Self {
6464+ root_node: MstNode,
6565+ ) -> Option<Self> {
6666+ Some(Self {
6767 prev_rkey: "".to_string(),
6868- todo: vec![(
6969- depth + 1, // we're kind of inventing a fake root one above the real root
7070- // ... maybe we should just pass in the real root here???
7171- NodeThing {
7272- cid: root_cid,
7373- kind: ThingKind::Tree,
7474- },
7575- )],
6868+ root_depth: root_node.depth?,
6969+ todo: vec![root_node.things],
7070+ })
7171+ }
7272+7373+ fn next_todo(&mut self) -> Option<NodeThing> {
7474+ while let Some(last) = self.todo.last_mut() {
7575+ let Some(thing) = last.pop() else {
7676+ self.todo.pop();
7777+ continue;
7878+ };
7979+ return Some(thing);
7680 }
8181+ None
7782 }
78837984 fn mpb_step(
8085 &mut self,
8181- depth: Depth,
8286 kind: ThingKind,
8387 cid: Cid,
8488 mpb: &MaybeProcessedBlock,
···99103 }
100104 self.prev_rkey = rkey.clone();
101105106106+ log::trace!("val @ {rkey}");
102107 Ok(Some(Output {
103108 rkey,
104109 cid,
···117122 return Err(WalkError::MstError(MstError::EmptyNode));
118123 }
119124120120- let next_depth = depth.checked_sub(1).ok_or(MstError::DepthUnderflow)?;
125125+ let current_depth = self.root_depth - (self.todo.len() - 1) as u32;
126126+ let next_depth = current_depth.checked_sub(1).ok_or(MstError::DepthUnderflow)?;
121127 if let Some(d) = node.depth {
122128 if d != next_depth {
123129 return Err(WalkError::MstError(MstError::WrongDepth {
···127133 }
128134 }
129135130130- for thing in node.things {
131131- self.todo.push((next_depth, thing));
132132- }
133133-136136+ log::trace!("node into depth {next_depth}");
137137+ self.todo.push(node.things);
134138 Ok(None)
135139 }
136140 }
···143147 process: impl Fn(Bytes) -> Bytes,
144148 ) -> Result<Option<Output>, WalkError> {
145149146146- while let Some((depth, NodeThing { cid, kind })) = self.todo.pop() {
150150+ while let Some(NodeThing { cid, kind }) = self.next_todo() {
147151 let Some(mpb) = blocks.get(&cid) else {
148152 return Err(WalkError::MissingBlock(cid));
149153 };
150150- if let Some(out) = self.mpb_step(depth, kind, cid, mpb, &process)? {
154154+155155+ if let Some(out) = self.mpb_step(kind, cid, mpb, &process)? {
151156 return Ok(Some(out));
152157 }
153158 }
154154-155155- log::trace!("tried to walk but we're actually done.");
156159 Ok(None)
157160 }
158161···162165 blocks: &mut DiskStore,
163166 process: impl Fn(Bytes) -> Bytes,
164167 ) -> Result<Option<Output>, WalkError> {
165165-166166- while let Some((depth, NodeThing { cid, kind })) = self.todo.pop() {
168168+ while let Some(NodeThing { cid, kind }) = self.next_todo() {
167169 let Some(block_slice) = blocks.get(&cid.to_bytes())? else {
168170 return Err(WalkError::MissingBlock(cid));
169171 };
170172 let mpb = MaybeProcessedBlock::from_bytes(block_slice.to_vec());
171171- if let Some(out) = self.mpb_step(depth, kind, cid, &mpb, &process)? {
173173+ if let Some(out) = self.mpb_step(kind, cid, &mpb, &process)? {
172174 return Ok(Some(out));
173175 }
174176 }
175175- log::trace!("tried to walk but we're actually done.");
176177 Ok(None)
177178 }
178179}