Our Personal Data Server from scratch! tranquil.farm
oauth atproto pds rust postgresql objectstorage fun

Sharded filesystem subdirs

authored by oyster.cafe and committed by tangled.org 2d10dc09 230d9026

+100 -1
+100 -1
crates/tranquil-storage/src/lib.rs
··· 19 19 20 20 const MIN_PART_SIZE: usize = 5 * 1024 * 1024; 21 21 const EXDEV: i32 = 18; 22 + const CID_SHARD_PREFIX_LEN: usize = 9; 23 + 24 + fn split_cid_path(key: &str) -> Option<(&str, &str)> { 25 + let is_cid = key.get(..3).map_or(false, |p| p.eq_ignore_ascii_case("baf")); 26 + (key.len() > CID_SHARD_PREFIX_LEN && is_cid) 27 + .then(|| key.split_at(CID_SHARD_PREFIX_LEN)) 28 + } 22 29 23 30 fn validate_key(key: &str) -> Result<(), StorageError> { 24 31 let dominated_by_traversal = key ··· 483 490 484 491 fn resolve_path(&self, key: &str) -> Result<PathBuf, StorageError> { 485 492 validate_key(key)?; 486 - Ok(self.base_path.join(key)) 493 + Ok(split_cid_path(key).map_or_else( 494 + || self.base_path.join(key), 495 + |(dir, file)| self.base_path.join(dir).join(file), 496 + )) 487 497 } 488 498 489 499 async fn atomic_write(&self, path: &Path, data: &[u8]) -> Result<(), StorageError> { ··· 751 761 } 752 762 753 763 impl<T> Pipe for T {} 764 + 765 + #[cfg(test)] 766 + mod tests { 767 + use super::*; 768 + 769 + #[test] 770 + fn split_path_from_raw_blob_cid() { 771 + let cid = "bafkreihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku"; 772 + assert_eq!( 773 + split_cid_path(cid), 774 + Some(("bafkreihd", "wdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku")) 775 + ); 776 + } 777 + 778 + #[test] 779 + fn split_path_from_dag_cbor_cid() { 780 + let cid = "bafyreigdmqpykrgxyaxtlafqpqhzrb7qy2rh75nldvfd4tucqmqqme5yje"; 781 + assert_eq!( 782 + split_cid_path(cid), 783 + Some(("bafyreigd", "mqpykrgxyaxtlafqpqhzrb7qy2rh75nldvfd4tucqmqqme5yje")) 784 + ); 785 + } 786 + 787 + #[test] 788 + fn no_split_for_temp_keys() { 789 + assert_eq!(split_cid_path("temp/abc123"), None); 790 + } 791 + 792 + #[test] 793 + fn no_split_for_short_keys() { 794 + assert_eq!(split_cid_path("bafkreihd"), None); 795 + assert_eq!(split_cid_path("bafkrei"), None); 796 + assert_eq!(split_cid_path("baf"), None); 797 + assert_eq!(split_cid_path("ba"), None); 798 + assert_eq!(split_cid_path(""), None); 799 + } 800 + 801 + #[test] 802 + fn no_split_for_non_cid_keys() { 803 + assert_eq!(split_cid_path("something/else/entirely"), None); 804 + assert_eq!(split_cid_path("Qmabcdefghijklmnop"), None); 805 + } 806 + 807 + #[test] 808 + fn split_cid_case_insensitive() { 809 + let upper = "BAFKREIHDWDCEFGH4DQKJV67UZCMW7OJEE6XEDZDETOJUZJEVTENXQUVYKU"; 810 + let mixed = "BaFkReIhDwDcEfGh4DqKjV67UzCmW7OjEe6XeDzDeTojUzJevTeNxQuVyKu"; 811 + assert_eq!( 812 + split_cid_path(upper), 813 + Some(("BAFKREIHD", "WDCEFGH4DQKJV67UZCMW7OJEE6XEDZDETOJUZJEVTENXQUVYKU")) 814 + ); 815 + assert_eq!( 816 + split_cid_path(mixed), 817 + Some(("BaFkReIhD", "wDcEfGh4DqKjV67UzCmW7OjEe6XeDzDeTojUzJevTeNxQuVyKu")) 818 + ); 819 + } 820 + 821 + #[test] 822 + fn split_at_minimum_length() { 823 + let cid = "bafkreihdx"; 824 + assert_eq!(split_cid_path(cid), Some(("bafkreihd", "x"))); 825 + } 826 + 827 + #[test] 828 + fn resolve_path_shards_cid_keys() { 829 + let base = PathBuf::from("/blobs"); 830 + let cid = "bafkreihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku"; 831 + 832 + let expected = PathBuf::from("/blobs/bafkreihd/wdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku"); 833 + let result = split_cid_path(cid).map_or_else( 834 + || base.join(cid), 835 + |(dir, file)| base.join(dir).join(file), 836 + ); 837 + assert_eq!(result, expected); 838 + } 839 + 840 + #[test] 841 + fn resolve_path_no_shard_for_temp() { 842 + let base = PathBuf::from("/blobs"); 843 + let key = "temp/abc123"; 844 + 845 + let expected = PathBuf::from("/blobs/temp/abc123"); 846 + let result = split_cid_path(key).map_or_else( 847 + || base.join(key), 848 + |(dir, file)| base.join(dir).join(file), 849 + ); 850 + assert_eq!(result, expected); 851 + } 852 + }