···6565 }
6666 };
67676868- let Some(driver) = driver else {
6969- panic!("big car but somehow empty MST: is the archive stuffed with garbage?");
7070- };
7171-7268 // collect some random stats about the blocks
7369 let mut n = 0;
7470 let mut zeros = 0;
+4-6
examples/read-file/main.rs
···2323 let reader = tokio::fs::File::open(file).await?;
2424 let reader = tokio::io::BufReader::new(reader);
25252626- let (commit, driver) = match DriverBuilder::new()
2626+ let (commit, mut driver) = match DriverBuilder::new()
2727 .with_block_processor(|block| block.len().to_ne_bytes().to_vec())
2828 .load_car(reader)
2929 .await?
···3535 log::info!("got commit: {commit:?}");
36363737 let mut n = 0;
3838- if let Some(mut driver) = driver {
3939- while let Some(pairs) = driver.next_chunk(256).await? {
4040- n += pairs.len();
4141- // log::info!("got {rkey:?}");
4242- }
3838+ while let Some(pairs) = driver.next_chunk(256).await? {
3939+ n += pairs.len();
4040+ // log::info!("got {rkey:?}");
4341 }
4442 log::info!("bye! total records={n}");
4543
+7-7
readme.md
···7474- 450MiB CAR file (huge): `1.3s`
7575- 128MiB (huge): `350ms`
7676- 5.0MiB: `6.8ms`
7777-- 279KiB: `170us`
7878-- 3.4KiB: `5.2us`
7979-- empty: `670ns`
7777+- 279KiB: `160us`
7878+- 3.4KiB: `5.1us`
7979+- empty: `690ns`
80808181it's a little faster with `mimalloc`
8282···88888989- 450MiB CAR file: `1.2s` (-8%)
9090- 128MiB: `300ms` (-14%)
9191-- 5.0MiB: `6.0ms` (-12%)
9292-- 279KiB: `140us` (-21%)
9393-- 3.4KiB: `4.7us` (-10%)
9494-- empty: `640ns` (-4%)
9191+- 5.0MiB: `6.0ms` (-11%)
9292+- 279KiB: `150us` (-7%)
9393+- 3.4KiB: `4.7us` (-8%)
9494+- empty: `670ns` (-4%)
95959696processing CARs requires buffering blocks, so it can consume a lot of memory. repo-stream's in-memory driver has minimal memory overhead, but there are two ways to make it work with less mem (you can do either or both!)
9797
+8-13
src/drive.rs
···107107 ///
108108 /// You probably want to check the commit's signature. You can go ahead and
109109 /// walk the MST right away.
110110- Memory(Commit, Option<MemDriver>),
110110+ Memory(Commit, MemDriver),
111111 /// Blocks exceed the memory limit
112112 ///
113113 /// You'll need to provide a disk storage to continue. The commit will be
···233233 MaybeProcessedBlock::Processed(_) => Err(WalkError::BadCommitFingerprint)?,
234234 MaybeProcessedBlock::Raw(bytes) => serde_ipld_dagcbor::from_slice(bytes)?,
235235 };
236236- let Some(walker) = Walker::new(root_node) else {
237237- // TODO: actually we still want the commit in this case
238238- return Ok(Driver::Memory(commit, None));
239239- };
236236+ let walker = Walker::new(root_node);
240237241238 Ok(Driver::Memory(
242239 commit,
243243- Some(MemDriver {
240240+ MemDriver {
244241 blocks: mem_blocks,
245242 walker,
246243 process,
247247- }),
244244+ },
248245 ))
249246 }
250247}
···304301 pub async fn finish_loading(
305302 mut self,
306303 mut store: DiskStore,
307307- ) -> Result<(Commit, Option<DiskDriver>), DriveError> {
304304+ ) -> Result<(Commit, DiskDriver), DriveError> {
308305 // move store in and back out so we can manage lifetimes
309306 // dump mem blocks into the store
310307 store = tokio::task::spawn(async move {
···386383 MaybeProcessedBlock::Processed(_) => Err(WalkError::BadCommitFingerprint)?,
387384 MaybeProcessedBlock::Raw(bytes) => serde_ipld_dagcbor::from_slice(&bytes)?,
388385 };
389389- let Some(walker) = Walker::new(node) else {
390390- return Ok((commit, None));
391391- };
386386+ let walker = Walker::new(node);
392387393388 Ok((
394389 commit,
395395- Some(DiskDriver {
390390+ DiskDriver {
396391 process: self.process,
397392 state: Some(BigState { store, walker }),
398398- }),
393393+ },
399394 ))
400395 }
401396}