···11[package]
22name = "repo-stream"
33-version = "0.3.1"
33+version = "0.4.0"
44edition = "2024"
55license = "MIT OR Apache-2.0"
66description = "Fast and robust atproto CAR file processing"
+7
changelog.md
···11+# v0.4.0
22+33+_2026-01-15_
44+55+- use `Output { rkey, cid, data }` instead of the `(rkey, data)` tuple so that the `Cid` is exposed. this is to make tap-like diffing possible.
66+77+18# v0.3.1
29310_2026-01-15_
+2-2
examples/disk-read-file/main.rs
···8080 // keep a count of the total number of blocks seen
8181 n += pairs.len();
82828383- for (_, block) in pairs {
8383+ for output in pairs {
8484 // for each block, count how many bytes are equal to '0'
8585 // (this is just an example, you probably want to do something more
8686 // interesting)
8787- zeros += block.into_iter().filter(|&b| b == b'0').count()
8787+ zeros += output.data.into_iter().filter(|&b| b == b'0').count()
8888 }
8989 }
9090
+5-5
readme.md
···1111[sponsor-badge]: https://img.shields.io/badge/at-microcosm-b820f9?labelColor=b820f9&logo=githubsponsors&logoColor=fff
12121313```rust no_run
1414-use repo_stream::{Driver, DriverBuilder, DriveError, DiskBuilder};
1414+use repo_stream::{Driver, DriverBuilder, DriveError, DiskBuilder, Output};
15151616#[tokio::main]
1717async fn main() -> Result<(), Box<dyn std::error::Error>> {
···3333 // if all blocks fit within memory
3434 Driver::Memory(_commit, mut driver) => {
3535 while let Some(chunk) = driver.next_chunk(256).await? {
3636- for (_rkey, processed) in chunk {
3737- let size = usize::from_ne_bytes(processed.try_into().unwrap());
3636+ for Output { rkey: _, cid: _, data } in chunk {
3737+ let size = usize::from_ne_bytes(data.try_into().unwrap());
3838 total_size += size;
3939 }
4040 }
···4848 let (_commit, mut driver) = paused.finish_loading(store).await?;
49495050 while let Some(chunk) = driver.next_chunk(256).await? {
5151- for (_rkey, processed) in chunk {
5252- let size = usize::from_ne_bytes(processed.try_into().unwrap());
5151+ for Output { rkey: _, cid: _, data } in chunk {
5252+ let size = usize::from_ne_bytes(data.try_into().unwrap());
5353 total_size += size;
5454 }
5555 }
+12-14
src/drive.rs
···3535 JoinError(#[from] tokio::task::JoinError),
3636}
37373838-/// An in-order chunk of Rkey + (processed) Block pairs
3939-pub type BlockChunk = Vec<(String, Bytes)>;
3838+/// An in-order chunk of Rkey + CID + (processed) Block
3939+pub type BlockChunk = Vec<Output>;
40404141#[derive(Debug, Clone)]
4242pub(crate) enum MaybeProcessedBlock {
···272272 let mut out = Vec::with_capacity(n);
273273 for _ in 0..n {
274274 // walk as far as we can until we run out of blocks or find a record
275275- let Some(Output { rkey, cid: _, data }) =
276276- self.walker.step(&mut self.blocks, self.process)?
277277- else {
275275+ let Some(output) = self.walker.step(&mut self.blocks, self.process)? else {
278276 break;
279277 };
280280- out.push((rkey, data));
278278+ out.push(output);
281279 }
282280 if out.is_empty() {
283281 Ok(None)
···424422 /// # async fn main() -> Result<(), DriveError> {
425423 /// # let mut disk_driver = _get_fake_disk_driver();
426424 /// while let Some(pairs) = disk_driver.next_chunk(256).await? {
427427- /// for (rkey, record) in pairs {
428428- /// println!("{rkey}: size={}", record.len());
425425+ /// for output in pairs {
426426+ /// println!("{}: size={}", output.rkey, output.data.len());
429427 /// }
430428 /// }
431429 /// # Ok(())
···452450 return (state, Err(e.into()));
453451 }
454452 };
455455- let Some(Output { rkey, cid: _, data }) = step else {
453453+ let Some(output) = step else {
456454 break;
457455 };
458458- out.push((rkey, data));
456456+ out.push(output);
459457 }
460458461459 (state, Ok::<_, DriveError>(out))
···492490 Err(e) => return tx.blocking_send(Err(e.into())),
493491 };
494492495495- let Some(Output { rkey, cid: _, data }) = step else {
493493+ let Some(output) = step else {
496494 break;
497495 };
498498- out.push((rkey, data));
496496+ out.push(output);
499497 }
500498501499 if out.is_empty() {
···525523 /// let (mut rx, join) = disk_driver.to_channel(512);
526524 /// while let Some(recvd) = rx.recv().await {
527525 /// let pairs = recvd?;
528528- /// for (rkey, record) in pairs {
529529- /// println!("{rkey}: size={}", record.len());
526526+ /// for output in pairs {
527527+ /// println!("{}: size={}", output.rkey, output.data.len());
530528 /// }
531529 ///
532530 /// }
+5-4
src/lib.rs
···3737 // if all blocks fit within memory
3838 Driver::Memory(_commit, mut driver) => {
3939 while let Some(chunk) = driver.next_chunk(256).await? {
4040- for (_rkey, bytes) in chunk {
4141- let size = usize::from_ne_bytes(bytes.try_into().unwrap());
4040+ for output in chunk {
4141+ let size = usize::from_ne_bytes(output.data.try_into().unwrap());
42424343 total_size += size;
4444 }
···5353 let (_commit, mut driver) = paused.finish_loading(store).await?;
54545555 while let Some(chunk) = driver.next_chunk(256).await? {
5656- for (_rkey, bytes) in chunk {
5757- let size = usize::from_ne_bytes(bytes.try_into().unwrap());
5656+ for output in chunk {
5757+ let size = usize::from_ne_bytes(output.data.try_into().unwrap());
58585959 total_size += size;
6060 }
···8686pub use disk::{DiskBuilder, DiskError, DiskStore};
8787pub use drive::{DriveError, Driver, DriverBuilder, NeedDisk, noop};
8888pub use mst::Commit;
8989+pub use walk::Output;
89909091pub type Bytes = Vec<u8>;
9192
+3-2
tests/non-huge-cars.rs
···11extern crate repo_stream;
22use repo_stream::Driver;
33+use repo_stream::Output;
3445const EMPTY_CAR: &'static [u8] = include_bytes!("../car-samples/empty.car");
56const TINY_CAR: &'static [u8] = include_bytes!("../car-samples/tiny.car");
···3031 let mut prev_rkey = "".to_string();
31323233 while let Some(pairs) = driver.next_chunk(256).await.unwrap() {
3333- for (rkey, bytes) in pairs {
3434+ for Output { rkey, cid: _, data } in pairs {
3435 records += 1;
35363636- let (int_bytes, _) = bytes.split_at(size_of::<usize>());
3737+ let (int_bytes, _) = data.split_at(size_of::<usize>());
3738 let size = usize::from_ne_bytes(int_bytes.try_into().unwrap());
38393940 sum += size;