···3333 let reader = tokio::io::BufReader::new(reader);
34343535 let mut driver = match Driver::load_car(reader, ser, 1024).await.unwrap() {
3636- Driver::Memory(_, mem_driver) => mem_driver,
3636+ Driver::Memory(_, _, mem_driver) => mem_driver,
3737 Driver::Disk(_) => panic!("not doing disk for benchmark"),
3838 };
3939
+3-3
benches/non-huge-cars.rs
···11extern crate repo_stream;
22-use repo_stream::Driver;
22+use repo_stream::{Driver, Step};
3344use criterion::{Criterion, criterion_group, criterion_main};
55···40404141async fn drive_car(bytes: &[u8]) -> usize {
4242 let mut driver = match Driver::load_car(bytes, ser, 32).await.unwrap() {
4343- Driver::Memory(_, mem_driver) => mem_driver,
4343+ Driver::Memory(_, _, mem_driver) => mem_driver,
4444 Driver::Disk(_) => panic!("not benching big cars here"),
4545 };
46464747 let mut n = 0;
4848- while let Some(pairs) = driver.next_chunk(256).await.unwrap() {
4848+ while let Step::Value(pairs) = driver.next_chunk(256).await.unwrap() {
4949 n += pairs.len();
5050 }
5151 n
+10-7
examples/disk-read-file/main.rs
···99static GLOBAL: MiMalloc = MiMalloc;
10101111use clap::Parser;
1212-use repo_stream::{DiskBuilder, Driver, DriverBuilder};
1212+use repo_stream::{DiskBuilder, Driver, DriverBuilder, Step};
1313use std::path::PathBuf;
1414use std::time::Instant;
1515···4242 .load_car(reader)
4343 .await?
4444 {
4545- Driver::Memory(_, _) => panic!("try this on a bigger car"),
4545+ Driver::Memory(_, _, _) => panic!("try this on a bigger car"),
4646 Driver::Disk(big_stuff) => {
4747 // we reach here if the repo was too big and needs to be spilled to
4848 // disk to continue
···5151 let disk_store = DiskBuilder::new().open(tmpfile).await?;
52525353 // do the spilling, get back a (similar) driver
5454- let (commit, driver) = big_stuff.finish_loading(disk_store).await?;
5454+ let (commit, _, driver) = big_stuff.finish_loading(disk_store).await?;
55555656 // at this point you might want to fetch the account's signing key
5757 // via the DID from the commit, and then verify the signature.
···7474 // this example uses the disk driver's channel mode: the tree walking is
7575 // spawned onto a blocking thread, and we get chunks of rkey+blocks back
7676 let (mut rx, join) = driver.to_channel(512);
7777- while let Some(r) = rx.recv().await {
7878- let pairs = r?;
7777+ while let Some(step) = rx.recv().await {
7878+ let step = step?;
7979+ let Step::Value(outputs) = step else {
8080+ break;
8181+ };
79828083 // keep a count of the total number of blocks seen
8181- n += pairs.len();
8484+ n += outputs.len();
82858383- for output in pairs {
8686+ for output in outputs {
8487 // for each block, count how many bytes are equal to '0'
8588 // (this is just an example, you probably want to do something more
8689 // interesting)
+3-3
examples/read-file/main.rs
···4455extern crate repo_stream;
66use clap::Parser;
77-use repo_stream::{Driver, DriverBuilder};
77+use repo_stream::{Driver, DriverBuilder, Step};
88use std::path::PathBuf;
991010type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
···2828 .load_car(reader)
2929 .await?
3030 {
3131- Driver::Memory(commit, mem_driver) => (commit, mem_driver),
3131+ Driver::Memory(commit, _, mem_driver) => (commit, mem_driver),
3232 Driver::Disk(_) => panic!("this example doesn't handle big CARs"),
3333 };
34343535 log::info!("got commit: {commit:?}");
36363737 let mut n = 0;
3838- while let Some(pairs) = driver.next_chunk(256).await? {
3838+ while let Step::Value(pairs) = driver.next_chunk(256).await? {
3939 n += pairs.len();
4040 // log::info!("got {rkey:?}");
4141 }
+5-5
readme.md
···1111[sponsor-badge]: https://img.shields.io/badge/at-microcosm-b820f9?labelColor=b820f9&logo=githubsponsors&logoColor=fff
12121313```rust no_run
1414-use repo_stream::{Driver, DriverBuilder, DriveError, DiskBuilder, Output};
1414+use repo_stream::{Driver, DriverBuilder, DriveError, DiskBuilder, Output, Step};
15151616#[tokio::main]
1717async fn main() -> Result<(), Box<dyn std::error::Error>> {
···3131 {
32323333 // if all blocks fit within memory
3434- Driver::Memory(_commit, mut driver) => {
3535- while let Some(chunk) = driver.next_chunk(256).await? {
3434+ Driver::Memory(_commit, _prev_rkey, mut driver) => {
3535+ while let Step::Value(chunk) = driver.next_chunk(256).await? {
3636 for Output { rkey: _, cid: _, data } in chunk {
3737 let size = usize::from_ne_bytes(data.try_into().unwrap());
3838 total_size += size;
···4545 // set up a disk store we can spill to
4646 let store = DiskBuilder::new().open("some/path.db".into()).await?;
4747 // do the spilling, get back a (similar) driver
4848- let (_commit, mut driver) = paused.finish_loading(store).await?;
4848+ let (_commit, _prev_rkey, mut driver) = paused.finish_loading(store).await?;
49495050- while let Some(chunk) = driver.next_chunk(256).await? {
5050+ while let Step::Value(chunk) = driver.next_chunk(256).await? {
5151 for Output { rkey: _, cid: _, data } in chunk {
5252 let size = usize::from_ne_bytes(data.try_into().unwrap());
5353 total_size += size;
+26-23
src/drive.rs
···11//! Consume a CAR from an AsyncRead, producing an ordered stream of records
2233use crate::{
44- Bytes, HashMap,
44+ Bytes, HashMap, Rkey, Step,
55 disk::{DiskError, DiskStore},
66 mst::MstNode,
77 walk::Output,
···107107 ///
108108 /// You probably want to check the commit's signature. You can go ahead and
109109 /// walk the MST right away.
110110- Memory(Commit, MemDriver),
110110+ Memory(Commit, Option<Rkey>, MemDriver),
111111 /// Blocks exceed the memory limit
112112 ///
113113 /// You'll need to provide a disk storage to continue. The commit will be
···237237238238 Ok(Driver::Memory(
239239 commit,
240240+ None,
240241 MemDriver {
241242 blocks: mem_blocks,
242243 walker,
···268269269270impl MemDriver {
270271 /// Step through the record outputs, in rkey order
271271- pub async fn next_chunk(&mut self, n: usize) -> Result<Option<BlockChunk>, DriveError> {
272272+ pub async fn next_chunk(&mut self, n: usize) -> Result<Step<BlockChunk>, DriveError> {
272273 let mut out = Vec::with_capacity(n);
273274 for _ in 0..n {
274275 // walk as far as we can until we run out of blocks or find a record
275275- let Some(output) = self.walker.step(&mut self.blocks, self.process)? else {
276276+ let Step::Value(output) = self.walker.step(&mut self.blocks, self.process)? else {
276277 break;
277278 };
278279 out.push(output);
279280 }
280281 if out.is_empty() {
281281- Ok(None)
282282+ Ok(Step::End(None))
282283 } else {
283283- Ok(Some(out))
284284+ Ok(Step::Value(out))
284285 }
285286 }
286287}
···299300 pub async fn finish_loading(
300301 mut self,
301302 mut store: DiskStore,
302302- ) -> Result<(Commit, DiskDriver), DriveError> {
303303+ ) -> Result<(Commit, Option<Rkey>, DiskDriver), DriveError> {
303304 // move store in and back out so we can manage lifetimes
304305 // dump mem blocks into the store
305306 store = tokio::task::spawn(async move {
···385386386387 Ok((
387388 commit,
389389+ None,
388390 DiskDriver {
389391 process: self.process,
390392 state: Some(BigState { store, walker }),
···417419 /// Walk the MST returning up to `n` rkey + record pairs
418420 ///
419421 /// ```no_run
420420- /// # use repo_stream::{drive::{DiskDriver, DriveError, _get_fake_disk_driver}, noop};
422422+ /// # use repo_stream::{drive::{DiskDriver, DriveError, _get_fake_disk_driver}, Step, noop};
421423 /// # #[tokio::main]
422424 /// # async fn main() -> Result<(), DriveError> {
423425 /// # let mut disk_driver = _get_fake_disk_driver();
424424- /// while let Some(pairs) = disk_driver.next_chunk(256).await? {
425425- /// for output in pairs {
426426+ /// while let Step::Value(outputs) = disk_driver.next_chunk(256).await? {
427427+ /// for output in outputs {
426428 /// println!("{}: size={}", output.rkey, output.data.len());
427429 /// }
428430 /// }
429431 /// # Ok(())
430432 /// # }
431433 /// ```
432432- pub async fn next_chunk(&mut self, n: usize) -> Result<Option<BlockChunk>, DriveError> {
434434+ pub async fn next_chunk(&mut self, n: usize) -> Result<Step<Vec<Output>>, DriveError> {
433435 let process = self.process;
434436435437 // state should only *ever* be None transiently while inside here
···450452 return (state, Err(e.into()));
451453 }
452454 };
453453- let Some(output) = step else {
455455+ let Step::Value(output) = step else {
454456 break;
455457 };
456458 out.push(output);
···466468 let out = res?;
467469468470 if out.is_empty() {
469469- Ok(None)
471471+ Ok(Step::End(None))
470472 } else {
471471- Ok(Some(out))
473473+ Ok(Step::Value(out))
472474 }
473475 }
474476475477 fn read_tx_blocking(
476478 &mut self,
477479 n: usize,
478478- tx: mpsc::Sender<Result<BlockChunk, DriveError>>,
479479- ) -> Result<(), mpsc::error::SendError<Result<BlockChunk, DriveError>>> {
480480+ tx: mpsc::Sender<Result<Step<BlockChunk>, DriveError>>,
481481+ ) -> Result<(), mpsc::error::SendError<Result<Step<BlockChunk>, DriveError>>> {
480482 let BigState { store, walker } = self.state.as_mut().expect("valid state");
481483482484 loop {
···490492 Err(e) => return tx.blocking_send(Err(e.into())),
491493 };
492494493493- let Some(output) = step else {
495495+ let Step::Value(output) = step else {
494496 break;
495497 };
496498 out.push(output);
···499501 if out.is_empty() {
500502 break;
501503 }
502502- tx.blocking_send(Ok(out))?;
504504+ tx.blocking_send(Ok(Step::Value(out)))?;
503505 }
504506505507 Ok(())
···516518 /// benefit over just using `.next_chunk(n)`.
517519 ///
518520 /// ```no_run
519519- /// # use repo_stream::{drive::{DiskDriver, DriveError, _get_fake_disk_driver}, noop};
521521+ /// # use repo_stream::{drive::{DiskDriver, DriveError, _get_fake_disk_driver}, Step, noop};
520522 /// # #[tokio::main]
521523 /// # async fn main() -> Result<(), DriveError> {
522524 /// # let mut disk_driver = _get_fake_disk_driver();
523525 /// let (mut rx, join) = disk_driver.to_channel(512);
524526 /// while let Some(recvd) = rx.recv().await {
525525- /// let pairs = recvd?;
526526- /// for output in pairs {
527527+ /// let outputs = recvd?;
528528+ /// let Step::Value(outputs) = outputs else { break; };
529529+ /// for output in outputs {
527530 /// println!("{}: size={}", output.rkey, output.data.len());
528531 /// }
529532 ///
···535538 mut self,
536539 n: usize,
537540 ) -> (
538538- mpsc::Receiver<Result<BlockChunk, DriveError>>,
541541+ mpsc::Receiver<Result<Step<BlockChunk>, DriveError>>,
539542 tokio::task::JoinHandle<Self>,
540543 ) {
541541- let (tx, rx) = mpsc::channel::<Result<BlockChunk, DriveError>>(1);
544544+ let (tx, rx) = mpsc::channel::<Result<Step<BlockChunk>, DriveError>>(1);
542545543546 // sketch: this worker is going to be allowed to execute without a join handle
544547 let chan_task = tokio::task::spawn_blocking(move || {
+6-6
src/lib.rs
···1818`iroh_car` additionally applies a block size limit of `2MiB`.
19192020```
2121-use repo_stream::{Driver, DriverBuilder, DiskBuilder};
2121+use repo_stream::{Driver, DriverBuilder, DiskBuilder, Step};
22222323# #[tokio::main]
2424# async fn main() -> Result<(), Box<dyn std::error::Error>> {
···3535{
36363737 // if all blocks fit within memory
3838- Driver::Memory(_commit, mut driver) => {
3939- while let Some(chunk) = driver.next_chunk(256).await? {
3838+ Driver::Memory(_commit, _prev_rkey, mut driver) => {
3939+ while let Step::Value(chunk) = driver.next_chunk(256).await? {
4040 for output in chunk {
4141 let size = usize::from_ne_bytes(output.data.try_into().unwrap());
4242···5050 // set up a disk store we can spill to
5151 let store = DiskBuilder::new().open("some/path.db".into()).await?;
5252 // do the spilling, get back a (similar) driver
5353- let (_commit, mut driver) = paused.finish_loading(store).await?;
5353+ let (_commit, _prev_rkey, mut driver) = paused.finish_loading(store).await?;
54545555- while let Some(chunk) = driver.next_chunk(256).await? {
5555+ while let Step::Value(chunk) = driver.next_chunk(256).await? {
5656 for output in chunk {
5757 let size = usize::from_ne_bytes(output.data.try_into().unwrap());
5858···8686pub use disk::{DiskBuilder, DiskError, DiskStore};
8787pub use drive::{DriveError, Driver, DriverBuilder, NeedDisk, noop};
8888pub use mst::Commit;
8989-pub use walk::Output;
8989+pub use walk::{Output, Step};
90909191pub type Bytes = Vec<u8>;
9292