Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

Add support for reverse ordering in link queries (Issue #1) #6

closed opened by seoul.systems targeting main from seoul.systems/microcosm-rs: order_query

The following adds support for reverse-chronological ordering when fetching back-links.

We add a non-required "reverse" query parameter that allows reversing the default order of returned back-links.

Supports both current storage backends (mem_store and rocks_store).

Labels

None yet.

Participants 2
AT URI
at://did:plc:53wellrw53o7sw4zlpfenvuh/sh.tangled.repo.pull/3majn54wt2l22
+368 -59
Diff #0
+3 -19
constellation/src/bin/main.rs
··· 26 26 #[arg(long)] 27 27 #[clap(default_value = "0.0.0.0:6789")] 28 28 bind: SocketAddr, 29 - /// optionally disable the metrics server 30 - #[arg(long)] 31 - #[clap(default_value_t = false)] 32 - collect_metrics: bool, 33 29 /// metrics server's listen address 34 30 #[arg(long)] 35 31 #[clap(default_value = "0.0.0.0:8765")] ··· 96 92 let bind = args.bind; 97 93 let metrics_bind = args.bind_metrics; 98 94 99 - let collect_metrics = args.collect_metrics; 100 95 let stay_alive = CancellationToken::new(); 101 96 102 97 match args.backend { ··· 107 102 stream, 108 103 bind, 109 104 metrics_bind, 110 - collect_metrics, 111 105 stay_alive, 112 106 ), 113 107 #[cfg(feature = "rocks")] ··· 142 136 stream, 143 137 bind, 144 138 metrics_bind, 145 - collect_metrics, 146 139 stay_alive, 147 140 ); 148 141 eprintln!("run finished: {r:?}"); ··· 154 147 } 155 148 } 156 149 157 - #[allow(clippy::too_many_lines)] 158 - #[allow(clippy::too_many_arguments)] 159 150 fn run( 160 151 mut storage: impl LinkStorage, 161 152 fixture: Option<PathBuf>, ··· 163 154 stream: String, 164 155 bind: SocketAddr, 165 156 metrics_bind: SocketAddr, 166 - collect_metrics: bool, 167 157 stay_alive: CancellationToken, 168 158 ) -> Result<()> { 169 159 ctrlc::set_handler({ ··· 208 198 .build() 209 199 .expect("axum startup") 210 200 .block_on(async { 211 - // Install metrics server only if requested 212 - if collect_metrics { 213 - install_metrics_server(metrics_bind)?; 214 - } 201 + install_metrics_server(metrics_bind)?; 215 202 serve(readable, bind, staying_alive).await 216 203 }) 217 204 .unwrap(); ··· 219 206 } 220 207 }); 221 208 222 - // only spawn monitoring thread if the metrics server is running 223 - if collect_metrics { 224 - s.spawn(move || { // monitor thread 209 + s.spawn(move || { // monitor thread 225 210 let stay_alive = stay_alive.clone(); 226 211 let check_alive = stay_alive.clone(); 227 212 ··· 273 258 } 274 259 } 275 260 stay_alive.drop_guard(); 276 - }); 277 - } 261 + }); 278 262 }); 279 263 280 264 println!("byeeee");
+14 -5
constellation/src/server/mod.rs
··· 25 25 26 26 use acceptable::{acceptable, ExtractAccept}; 27 27 28 - const DEFAULT_CURSOR_LIMIT: u64 = 100; 29 - const DEFAULT_CURSOR_LIMIT_MAX: u64 = 1000; 28 + const DEFAULT_CURSOR_LIMIT: u64 = 16; 29 + const DEFAULT_CURSOR_LIMIT_MAX: u64 = 100; 30 30 31 31 fn get_default_cursor_limit() -> u64 { 32 32 DEFAULT_CURSOR_LIMIT ··· 239 239 /// Set the max number of links to return per page of results 240 240 #[serde(default = "get_default_cursor_limit")] 241 241 limit: u64, 242 + /// Allow returning links in reverse order (default: false) 243 + #[serde(default)] 244 + reverse: bool, 242 245 } 243 246 #[derive(Serialize)] 244 247 struct OtherSubjectCount { ··· 301 304 collection, 302 305 &path, 303 306 &path_to_other, 307 + query.reverse, 304 308 limit, 305 309 cursor_key, 306 310 &filter_dids, ··· 409 413 /// Set the max number of links to return per page of results 410 414 #[serde(default = "get_default_cursor_limit")] 411 415 limit: u64, 412 - // TODO: allow reverse (er, forward) order as well 416 + /// Allow returning links in reverse order (default: false) 417 + #[serde(default)] 418 + reverse: bool, 413 419 } 414 420 #[derive(Template, Serialize)] 415 421 #[template(path = "get-backlinks.html.j2")] ··· 460 466 &query.subject, 461 467 collection, 462 468 &path, 469 + query.reverse, 463 470 limit, 464 471 until, 465 472 &filter_dids, ··· 508 515 from_dids: Option<String>, // comma separated: gross 509 516 #[serde(default = "get_default_cursor_limit")] 510 517 limit: u64, 511 - // TODO: allow reverse (er, forward) order as well 518 + /// Allow returning links in reverse order (default: false) 519 + #[serde(default)] 520 + reverse: bool, 512 521 } 513 522 #[derive(Template, Serialize)] 514 523 #[template(path = "links.html.j2")] ··· 562 571 &query.target, 563 572 &query.collection, 564 573 &query.path, 574 + query.reverse, 565 575 limit, 566 576 until, 567 577 &filter_dids, ··· 594 604 path: String, 595 605 cursor: Option<OpaqueApiCursor>, 596 606 limit: Option<u64>, 597 - // TODO: allow reverse (er, forward) order as well 598 607 } 599 608 #[derive(Template, Serialize)] 600 609 #[template(path = "dids.html.j2")]
+43 -11
constellation/src/storage/mem_store.rs
··· 140 140 collection: &str, 141 141 path: &str, 142 142 path_to_other: &str, 143 + reverse: bool, 143 144 limit: u64, 144 145 after: Option<String>, 145 146 filter_dids: &HashSet<Did>, ··· 157 158 let filter_to_targets: HashSet<Target> = 158 159 HashSet::from_iter(filter_to_targets.iter().map(|s| Target::new(s))); 159 160 160 - let mut grouped_counts: HashMap<Target, (u64, HashSet<Did>)> = HashMap::new(); 161 - for (did, rkey) in linkers.iter().flatten().cloned() { 161 + // the last type field here acts as an index to allow keeping track of the order in which 162 + // we encountred single elements 163 + let mut grouped_counts: HashMap<Target, (u64, HashSet<Did>, usize)> = HashMap::new(); 164 + for (idx, (did, rkey)) in linkers.iter().flatten().cloned().enumerate() { 162 165 if !filter_dids.is_empty() && !filter_dids.contains(&did) { 163 166 continue; 164 167 } ··· 184 187 .take(1) 185 188 .next() 186 189 { 187 - let e = grouped_counts.entry(fwd_target.clone()).or_default(); 190 + let e = 191 + grouped_counts 192 + .entry(fwd_target.clone()) 193 + .or_insert((0, HashSet::new(), idx)); 188 194 e.0 += 1; 189 195 e.1.insert(did.clone()); 190 196 } 191 197 } 192 198 let mut items: Vec<(String, u64, u64)> = grouped_counts 193 199 .iter() 194 - .map(|(k, (n, u))| (k.0.clone(), *n, u.len() as u64)) 200 + .map(|(k, (n, u, _))| (k.0.clone(), *n, u.len() as u64)) 195 201 .collect(); 196 - items.sort(); 202 + // sort in reverse order to show entries from oldest to newest 203 + if reverse { 204 + items.sort_by(|a, b| b.cmp(a)); 205 + } else { 206 + items.sort(); 207 + } 197 208 items = items 198 209 .into_iter() 199 210 .skip_while(|(t, _, _)| after.as_ref().map(|a| t <= a).unwrap_or(false)) ··· 239 250 target: &str, 240 251 collection: &str, 241 252 path: &str, 253 + reverse: bool, 242 254 limit: u64, 243 255 until: Option<u64>, 244 256 filter_dids: &HashSet<Did>, ··· 276 288 }; 277 289 278 290 let total = did_rkeys.len(); 279 - let end = until 280 - .map(|u| std::cmp::min(u as usize, total)) 281 - .unwrap_or(total); 282 - let begin = end.saturating_sub(limit as usize); 283 - let next = if begin == 0 { None } else { Some(begin as u64) }; 291 + 292 + let begin: usize; 293 + let end: usize; 294 + let next: Option<u64>; 295 + 296 + if reverse { 297 + begin = until.map(|u| (u) as usize).unwrap_or(0); 298 + end = std::cmp::min(begin + limit as usize, total); 299 + 300 + next = if end < total { 301 + Some(end as u64 + 1) 302 + } else { 303 + None 304 + }; 305 + } else { 306 + end = until 307 + .map(|u| std::cmp::min(u as usize, total)) 308 + .unwrap_or(total); 309 + begin = end.saturating_sub(limit as usize); 310 + next = if begin == 0 { None } else { Some(begin as u64) }; 311 + } 284 312 285 313 let alive = did_rkeys.iter().flatten().count(); 286 314 let gone = total - alive; 287 315 288 - let items: Vec<_> = did_rkeys[begin..end] 316 + let mut items: Vec<_> = did_rkeys[begin..end] 289 317 .iter() 290 318 .rev() 291 319 .flatten() ··· 296 324 collection: collection.to_string(), 297 325 }) 298 326 .collect(); 327 + 328 + if reverse { 329 + items.reverse(); 330 + } 299 331 300 332 Ok(PagedAppendingCollection { 301 333 version: (total as u64, gone as u64),
+247 -10
constellation/src/storage/mod.rs
··· 72 72 collection: &str, 73 73 path: &str, 74 74 path_to_other: &str, 75 + reverse: bool, 75 76 limit: u64, 76 77 after: Option<String>, 77 78 filter_dids: &HashSet<Did>, ··· 87 88 target: &str, 88 89 collection: &str, 89 90 path: &str, 91 + reverse: bool, 90 92 limit: u64, 91 93 until: Option<u64>, 92 94 filter_dids: &HashSet<Did>, ··· 180 182 "a.com", 181 183 "app.t.c", 182 184 ".abc.uri", 185 + false, 183 186 100, 184 187 None, 185 188 &HashSet::default() ··· 683 686 "a.com", 684 687 "app.t.c", 685 688 ".abc.uri", 689 + false, 686 690 100, 687 691 None, 688 692 &HashSet::default() ··· 727 731 0, 728 732 )?; 729 733 } 730 - let links = 731 - storage.get_links("a.com", "app.t.c", ".abc.uri", 2, None, &HashSet::default())?; 734 + let links = storage.get_links( 735 + "a.com", 736 + "app.t.c", 737 + ".abc.uri", 738 + false, 739 + 2, 740 + None, 741 + &HashSet::default(), 742 + )?; 732 743 let dids = storage.get_distinct_dids("a.com", "app.t.c", ".abc.uri", 2, None)?; 733 744 assert_eq!( 734 745 links, ··· 763 774 "a.com", 764 775 "app.t.c", 765 776 ".abc.uri", 777 + false, 766 778 2, 767 779 links.next, 768 780 &HashSet::default(), ··· 801 813 "a.com", 802 814 "app.t.c", 803 815 ".abc.uri", 816 + false, 804 817 2, 805 818 links.next, 806 819 &HashSet::default(), ··· 831 844 assert_stats(storage.get_stats()?, 5..=5, 1..=1, 5..=5); 832 845 }); 833 846 847 + test_each_storage!(get_links_reverse_order, |storage| { 848 + for i in 1..=5 { 849 + storage.push( 850 + &ActionableEvent::CreateLinks { 851 + record_id: RecordId { 852 + did: format!("did:plc:asdf-{i}").into(), 853 + collection: "app.t.c".into(), 854 + rkey: "asdf".into(), 855 + }, 856 + links: vec![CollectedLink { 857 + target: Link::Uri("a.com".into()), 858 + path: ".abc.uri".into(), 859 + }], 860 + }, 861 + 0, 862 + )?; 863 + } 864 + 865 + // Test reverse: true (oldest first) 866 + let links = storage.get_links( 867 + "a.com", 868 + "app.t.c", 869 + ".abc.uri", 870 + true, 871 + 2, 872 + None, 873 + &HashSet::default(), 874 + )?; 875 + assert_eq!( 876 + links, 877 + PagedAppendingCollection { 878 + version: (5, 0), 879 + items: vec![ 880 + RecordId { 881 + did: "did:plc:asdf-1".into(), 882 + collection: "app.t.c".into(), 883 + rkey: "asdf".into(), 884 + }, 885 + RecordId { 886 + did: "did:plc:asdf-2".into(), 887 + collection: "app.t.c".into(), 888 + rkey: "asdf".into(), 889 + }, 890 + ], 891 + next: Some(3), 892 + total: 5, 893 + } 894 + ); 895 + // Test reverse: false (newest first) 896 + let links = storage.get_links( 897 + "a.com", 898 + "app.t.c", 899 + ".abc.uri", 900 + false, 901 + 2, 902 + None, 903 + &HashSet::default(), 904 + )?; 905 + assert_eq!( 906 + links, 907 + PagedAppendingCollection { 908 + version: (5, 0), 909 + items: vec![ 910 + RecordId { 911 + did: "did:plc:asdf-5".into(), 912 + collection: "app.t.c".into(), 913 + rkey: "asdf".into(), 914 + }, 915 + RecordId { 916 + did: "did:plc:asdf-4".into(), 917 + collection: "app.t.c".into(), 918 + rkey: "asdf".into(), 919 + }, 920 + ], 921 + next: Some(3), 922 + total: 5, 923 + } 924 + ); 925 + assert_stats(storage.get_stats()?, 5..=5, 1..=1, 5..=5); 926 + }); 927 + 834 928 test_each_storage!(get_filtered_links, |storage| { 835 929 let links = storage.get_links( 836 930 "a.com", 837 931 "app.t.c", 838 932 ".abc.uri", 933 + false, 839 934 2, 840 935 None, 841 936 &HashSet::from([Did("did:plc:linker".to_string())]), ··· 869 964 "a.com", 870 965 "app.t.c", 871 966 ".abc.uri", 967 + false, 872 968 2, 873 969 None, 874 970 &HashSet::from([Did("did:plc:linker".to_string())]), ··· 891 987 "a.com", 892 988 "app.t.c", 893 989 ".abc.uri", 990 + false, 894 991 2, 895 992 None, 896 993 &HashSet::from([Did("did:plc:someone-else".to_string())]), ··· 938 1035 "a.com", 939 1036 "app.t.c", 940 1037 ".abc.uri", 1038 + false, 941 1039 2, 942 1040 None, 943 1041 &HashSet::from([Did("did:plc:linker".to_string())]), ··· 967 1065 "a.com", 968 1066 "app.t.c", 969 1067 ".abc.uri", 1068 + false, 970 1069 2, 971 1070 None, 972 1071 &HashSet::from([ ··· 999 1098 "a.com", 1000 1099 "app.t.c", 1001 1100 ".abc.uri", 1101 + false, 1002 1102 2, 1003 1103 None, 1004 1104 &HashSet::from([Did("did:plc:someone-unknown".to_string())]), ··· 1031 1131 0, 1032 1132 )?; 1033 1133 } 1034 - let links = 1035 - storage.get_links("a.com", "app.t.c", ".abc.uri", 2, None, &HashSet::default())?; 1134 + let links = storage.get_links( 1135 + "a.com", 1136 + "app.t.c", 1137 + ".abc.uri", 1138 + false, 1139 + 2, 1140 + None, 1141 + &HashSet::default(), 1142 + )?; 1036 1143 assert_eq!( 1037 1144 links, 1038 1145 PagedAppendingCollection { ··· 1057 1164 "a.com", 1058 1165 "app.t.c", 1059 1166 ".abc.uri", 1167 + false, 1060 1168 2, 1061 1169 links.next, 1062 1170 &HashSet::default(), ··· 1101 1209 0, 1102 1210 )?; 1103 1211 } 1104 - let links = 1105 - storage.get_links("a.com", "app.t.c", ".abc.uri", 2, None, &HashSet::default())?; 1212 + let links = storage.get_links( 1213 + "a.com", 1214 + "app.t.c", 1215 + ".abc.uri", 1216 + false, 1217 + 2, 1218 + None, 1219 + &HashSet::default(), 1220 + )?; 1106 1221 assert_eq!( 1107 1222 links, 1108 1223 PagedAppendingCollection { ··· 1141 1256 "a.com", 1142 1257 "app.t.c", 1143 1258 ".abc.uri", 1259 + false, 1144 1260 2, 1145 1261 links.next, 1146 1262 &HashSet::default(), ··· 1185 1301 0, 1186 1302 )?; 1187 1303 } 1188 - let links = 1189 - storage.get_links("a.com", "app.t.c", ".abc.uri", 2, None, &HashSet::default())?; 1304 + let links = storage.get_links( 1305 + "a.com", 1306 + "app.t.c", 1307 + ".abc.uri", 1308 + false, 1309 + 2, 1310 + None, 1311 + &HashSet::default(), 1312 + )?; 1190 1313 assert_eq!( 1191 1314 links, 1192 1315 PagedAppendingCollection { ··· 1219 1342 "a.com", 1220 1343 "app.t.c", 1221 1344 ".abc.uri", 1345 + false, 1222 1346 2, 1223 1347 links.next, 1224 1348 &HashSet::default(), ··· 1256 1380 0, 1257 1381 )?; 1258 1382 } 1259 - let links = 1260 - storage.get_links("a.com", "app.t.c", ".abc.uri", 2, None, &HashSet::default())?; 1383 + let links = storage.get_links( 1384 + "a.com", 1385 + "app.t.c", 1386 + ".abc.uri", 1387 + false, 1388 + 2, 1389 + None, 1390 + &HashSet::default(), 1391 + )?; 1261 1392 assert_eq!( 1262 1393 links, 1263 1394 PagedAppendingCollection { ··· 1286 1417 "a.com", 1287 1418 "app.t.c", 1288 1419 ".abc.uri", 1420 + false, 1289 1421 2, 1290 1422 links.next, 1291 1423 &HashSet::default(), ··· 1367 1499 "a.b.c", 1368 1500 ".d.e", 1369 1501 ".f.g", 1502 + false, 1370 1503 10, 1371 1504 None, 1372 1505 &HashSet::new(), ··· 1410 1543 "app.t.c", 1411 1544 ".abc.uri", 1412 1545 ".def.uri", 1546 + false, 1413 1547 10, 1414 1548 None, 1415 1549 &HashSet::new(), ··· 1509 1643 "app.t.c", 1510 1644 ".abc.uri", 1511 1645 ".def.uri", 1646 + false, 1512 1647 10, 1513 1648 None, 1514 1649 &HashSet::new(), ··· 1525 1660 "app.t.c", 1526 1661 ".abc.uri", 1527 1662 ".def.uri", 1663 + false, 1528 1664 10, 1529 1665 None, 1530 1666 &HashSet::from_iter([Did("did:plc:fdsa".to_string())]), ··· 1541 1677 "app.t.c", 1542 1678 ".abc.uri", 1543 1679 ".def.uri", 1680 + false, 1544 1681 10, 1545 1682 None, 1546 1683 &HashSet::new(), ··· 1551 1688 next: None, 1552 1689 } 1553 1690 ); 1691 + }); 1692 + 1693 + test_each_storage!(get_m2m_counts_reverse_order, |storage| { 1694 + // Create links from different DIDs to different targets 1695 + storage.push( 1696 + &ActionableEvent::CreateLinks { 1697 + record_id: RecordId { 1698 + did: "did:plc:user1".into(), 1699 + collection: "app.t.c".into(), 1700 + rkey: "post1".into(), 1701 + }, 1702 + links: vec![ 1703 + CollectedLink { 1704 + target: Link::Uri("a.com".into()), 1705 + path: ".abc.uri".into(), 1706 + }, 1707 + CollectedLink { 1708 + target: Link::Uri("b.com".into()), 1709 + path: ".def.uri".into(), 1710 + }, 1711 + ], 1712 + }, 1713 + 0, 1714 + )?; 1715 + storage.push( 1716 + &ActionableEvent::CreateLinks { 1717 + record_id: RecordId { 1718 + did: "did:plc:user2".into(), 1719 + collection: "app.t.c".into(), 1720 + rkey: "post1".into(), 1721 + }, 1722 + links: vec![ 1723 + CollectedLink { 1724 + target: Link::Uri("a.com".into()), 1725 + path: ".abc.uri".into(), 1726 + }, 1727 + CollectedLink { 1728 + target: Link::Uri("c.com".into()), 1729 + path: ".def.uri".into(), 1730 + }, 1731 + ], 1732 + }, 1733 + 1, 1734 + )?; 1735 + storage.push( 1736 + &ActionableEvent::CreateLinks { 1737 + record_id: RecordId { 1738 + did: "did:plc:user3".into(), 1739 + collection: "app.t.c".into(), 1740 + rkey: "post1".into(), 1741 + }, 1742 + links: vec![ 1743 + CollectedLink { 1744 + target: Link::Uri("a.com".into()), 1745 + path: ".abc.uri".into(), 1746 + }, 1747 + CollectedLink { 1748 + target: Link::Uri("d.com".into()), 1749 + path: ".def.uri".into(), 1750 + }, 1751 + ], 1752 + }, 1753 + 2, 1754 + )?; 1755 + 1756 + // Test reverse: false (default order - by target ascending) 1757 + let counts = storage.get_many_to_many_counts( 1758 + "a.com", 1759 + "app.t.c", 1760 + ".abc.uri", 1761 + ".def.uri", 1762 + false, 1763 + 10, 1764 + None, 1765 + &HashSet::new(), 1766 + &HashSet::new(), 1767 + )?; 1768 + assert_eq!(counts.items.len(), 3); 1769 + // Should be sorted by target in ascending order (alphabetical) 1770 + assert_eq!(counts.items[0].0, "b.com"); 1771 + assert_eq!(counts.items[1].0, "c.com"); 1772 + assert_eq!(counts.items[2].0, "d.com"); 1773 + 1774 + // Test reverse: true (descending order - by target descending) 1775 + let counts = storage.get_many_to_many_counts( 1776 + "a.com", 1777 + "app.t.c", 1778 + ".abc.uri", 1779 + ".def.uri", 1780 + true, 1781 + 10, 1782 + None, 1783 + &HashSet::new(), 1784 + &HashSet::new(), 1785 + )?; 1786 + assert_eq!(counts.items.len(), 3); 1787 + // Should be sorted by target in descending order (reverse alphabetical) 1788 + assert_eq!(counts.items[0].0, "d.com"); 1789 + assert_eq!(counts.items[1].0, "c.com"); 1790 + assert_eq!(counts.items[2].0, "b.com"); 1554 1791 }); 1555 1792 }
+34 -4
constellation/src/storage/rocks_store.rs
··· 941 941 collection: &str, 942 942 path: &str, 943 943 path_to_other: &str, 944 + reverse: bool, 944 945 limit: u64, 945 946 after: Option<String>, 946 947 filter_dids: &HashSet<Did>, ··· 1071 1072 } 1072 1073 1073 1074 let mut items: Vec<(String, u64, u64)> = Vec::with_capacity(grouped_counts.len()); 1075 + 1074 1076 for (target_id, (n, dids)) in &grouped_counts { 1075 1077 let Some(target) = self 1076 1078 .target_id_table ··· 1080 1082 continue; 1081 1083 }; 1082 1084 items.push((target.0 .0, *n, dids.len() as u64)); 1085 + } 1086 + 1087 + // Sort in desired direction 1088 + if reverse { 1089 + items.sort_by(|a, b| b.cmp(a)); // descending 1090 + } else { 1091 + items.sort(); // ascending 1083 1092 } 1084 1093 1085 1094 let next = if grouped_counts.len() as u64 >= limit { ··· 1127 1136 target: &str, 1128 1137 collection: &str, 1129 1138 path: &str, 1139 + reverse: bool, 1130 1140 limit: u64, 1131 1141 until: Option<u64>, 1132 1142 filter_dids: &HashSet<Did>, ··· 1167 1177 1168 1178 let (alive, gone) = linkers.count(); 1169 1179 let total = alive + gone; 1170 - let end = until.map(|u| std::cmp::min(u, total)).unwrap_or(total) as usize; 1171 - let begin = end.saturating_sub(limit as usize); 1172 - let next = if begin == 0 { None } else { Some(begin as u64) }; 1180 + 1181 + let end: usize; 1182 + let begin: usize; 1183 + let next: Option<u64>; 1173 1184 1174 - let did_id_rkeys = linkers.0[begin..end].iter().rev().collect::<Vec<_>>(); 1185 + if reverse { 1186 + begin = until.map(|u| (u - 1) as usize).unwrap_or(0); 1187 + end = std::cmp::min(begin + limit as usize, total as usize); 1188 + 1189 + next = if end < total as usize { 1190 + Some(end as u64 + 1) 1191 + } else { 1192 + None 1193 + } 1194 + } else { 1195 + end = until.map(|u| std::cmp::min(u, total)).unwrap_or(total) as usize; 1196 + begin = end.saturating_sub(limit as usize); 1197 + next = if begin == 0 { None } else { Some(begin as u64) }; 1198 + } 1199 + 1200 + let mut did_id_rkeys = linkers.0[begin..end].iter().rev().collect::<Vec<_>>(); 1201 + 1202 + if reverse { 1203 + did_id_rkeys.reverse(); 1204 + } 1175 1205 1176 1206 let mut items = Vec::with_capacity(did_id_rkeys.len()); 1177 1207 // TODO: use get-many (or multi-get or whatever it's called)
+4
constellation/templates/base.html.j2
··· 40 40 padding: 0.5em 0.3em; 41 41 max-width: 100%; 42 42 } 43 + pre.code input { 44 + margin: 0; 45 + padding: 0; 46 + } 43 47 .stat { 44 48 color: #f90; 45 49 font-size: 1.618rem;
+2 -1
constellation/templates/get-backlinks.html.j2
··· 6 6 7 7 {% block content %} 8 8 9 - {% call try_it::get_backlinks(query.subject, query.source, query.did, query.limit) %} 9 + {% call try_it::get_backlinks(query.subject, query.source, query.did, query.limit, query.reverse) %} 10 10 11 11 <h2> 12 12 Links to <code>{{ query.subject }}</code> ··· 40 40 <input type="hidden" name="did" value="{{ did }}" /> 41 41 {% endfor %} 42 42 <input type="hidden" name="cursor" value={{ c|json|safe }} /> 43 + <input type="hidden" name="reverse" value="{{ query.reverse }}"> 43 44 <button type="submit">next page&hellip;</button> 44 45 </form> 45 46 {% else %}
+2
constellation/templates/get-many-to-many-counts.html.j2
··· 13 13 query.did, 14 14 query.other_subject, 15 15 query.limit, 16 + query.reverse, 16 17 ) %} 17 18 18 19 <h2> ··· 53 54 {% endfor %} 54 55 <input type="hidden" name="limit" value="{{ query.limit }}" /> 55 56 <input type="hidden" name="cursor" value={{ c|json|safe }} /> 57 + <input type="hidden" name="reverse" value="{{ query.reverse }}"> 56 58 <button type="submit">next page&hellip;</button> 57 59 </form> 58 60 {% else %}
+7 -2
constellation/templates/hello.html.j2
··· 49 49 <li><p><code>source</code>: required. Example: <code>app.bsky.feed.like:subject.uri</code></p></li> 50 50 <li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 51 51 <li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li> 52 + <li><p><code>reverse</code>: optional, return links in reverse order. Default: <code>false</code></p></li> 52 53 </ul> 53 54 54 55 <p style="margin-bottom: 0"><strong>Try it:</strong></p> 55 - {% call try_it::get_backlinks("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like:subject.uri", [""], 16) %} 56 + {% call 57 + try_it::get_backlinks("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like:subject.uri", [""], 16, false) %} 56 58 57 59 58 60 <h3 class="route"><code>GET /xrpc/blue.microcosm.links.getManyToManyCounts</code></h3> ··· 68 70 <li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 69 71 <li><p><code>otherSubject</code>: optional, filter secondary links to specific subjects. Include multiple times to filter by multiple users. Example: <code>at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r</code></p></li> 70 72 <li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li> 73 + <li><p><code>reverse</code>: optional, return links in reverse order. Default: <code>false</code></p></li> 71 74 </ul> 72 75 73 76 <p style="margin-bottom: 0"><strong>Try it:</strong></p> ··· 78 81 [""], 79 82 [""], 80 83 25, 84 + false, 81 85 ) %} 82 86 83 87 ··· 96 100 <li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 97 101 <li><p><code>from_dids</code> [deprecated]: optional. Use <code>did</code> instead. Example: <code>from_dids=did:plc:vc7f4oafdgxsihk4cry2xpze,did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 98 102 <li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li> 103 + <li><p><code>reverse</code>: optional, return links in reverse order. Default: <code>false</code></p></li> 99 104 </ul> 100 105 101 106 <p style="margin-bottom: 0"><strong>Try it:</strong></p> 102 - {% call try_it::links("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like", ".subject.uri", [""], 16) %} 107 + {% call try_it::links("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like", ".subject.uri", [""], 16, false) %} 103 108 104 109 105 110 <h3 class="route"><code>GET /links/distinct-dids</code></h3>
+2 -1
constellation/templates/links.html.j2
··· 6 6 7 7 {% block content %} 8 8 9 - {% call try_it::links(query.target, query.collection, query.path, query.did, query.limit) %} 9 + {% call try_it::links(query.target, query.collection, query.path, query.did, query.limit, query.reverse) %} 10 10 11 11 <h2> 12 12 Links to <code>{{ query.target }}</code> ··· 37 37 <input type="hidden" name="collection" value="{{ query.collection }}" /> 38 38 <input type="hidden" name="path" value="{{ query.path }}" /> 39 39 <input type="hidden" name="cursor" value={{ c|json|safe }} /> 40 + <input type="hidden" name="reverse" value="{{ query.reverse }}"> 40 41 <button type="submit">next page&hellip;</button> 41 42 </form> 42 43 {% else %}
+10 -6
constellation/templates/try-it-macros.html.j2
··· 1 - {% macro get_backlinks(subject, source, dids, limit) %} 1 + {% macro get_backlinks(subject, source, dids, limit, reverse) %} 2 2 <form method="get" action="/xrpc/blue.microcosm.links.getBacklinks"> 3 3 <pre class="code"><strong>GET</strong> /xrpc/blue.microcosm.links.getBacklinks 4 4 ?subject= <input type="text" name="subject" value="{{ subject }}" placeholder="at-uri, did, uri..." /> ··· 6 6 {%- for did in dids %}{% if !did.is_empty() %} 7 7 &did= <input type="text" name="did" value="{{ did }}" placeholder="did:plc:..." />{% endif %}{% endfor %} 8 8 <span id="did-placeholder"></span> <button id="add-did">+ did filter</button> 9 - &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre> 9 + &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> 10 + &reverse= <input type="checkbox" name="reverse" value="true" checked="false"><button type="submit">get links</button></pre> 10 11 </form> 11 12 <script> 12 13 const addDidButton = document.getElementById('add-did'); ··· 24 25 </script> 25 26 {% endmacro %} 26 27 27 - {% macro get_many_to_many_counts(subject, source, pathToOther, dids, otherSubjects, limit) %} 28 + {% macro get_many_to_many_counts(subject, source, pathToOther, dids, otherSubjects, limit, reverse) %} 28 29 <form method="get" action="/xrpc/blue.microcosm.links.getManyToManyCounts"> 29 30 <pre class="code"><strong>GET</strong> /xrpc/blue.microcosm.links.getManyToManyCounts 30 31 ?subject= <input type="text" name="subject" value="{{ subject }}" placeholder="at-uri, did, uri..." /> ··· 36 37 {%- for otherSubject in otherSubjects %}{% if !otherSubject.is_empty() %} 37 38 &otherSubject= <input type="text" name="did" value="{{ otherSubject }}" placeholder="at-uri, did, uri..." />{% endif %}{% endfor %} 38 39 <span id="m2m-did-placeholder"></span> <button id="m2m-add-did">+ did filter</button> 39 - &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre> 40 + &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> 41 + &reverse= <input type="checkbox" name="reverse" value="true" checked="false"><button type="submit">get links</button></pre> 40 42 </form> 41 43 <script> 42 44 const m2mAddDidButton = document.getElementById('m2m-add-did'); ··· 66 68 </script> 67 69 {% endmacro %} 68 70 69 - {% macro links(target, collection, path, dids, limit) %} 71 + {% macro links(target, collection, path, dids, limit, reverse) %} 70 72 <form method="get" action="/links"> 71 73 <pre class="code"><strong>GET</strong> /links 72 74 ?target= <input type="text" name="target" value="{{ target }}" placeholder="target" /> ··· 75 77 {%- for did in dids %}{% if !did.is_empty() %} 76 78 &did= <input type="text" name="did" value="{{ did }}" placeholder="did:plc:..." />{% endif %}{% endfor %} 77 79 <span id="did-placeholder"></span> <button id="add-did">+ did filter</button> 78 - &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre> 80 + &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> 81 + &reverse= <input type="checkbox" name="reverse" value="true" checked="false"> 82 + <button type="submit">get links</button></pre> 79 83 </form> 80 84 <script> 81 85 const addDidButton = document.getElementById('add-did');

History

2 rounds 23 comments
sign up or login to add to the discussion
6 commits
expand
Add reverse ordering support to link query endpoints
Fix failing tests
Format tests
Add tests for reverse ordering in link queries
Fix pagination logic for reverse-ordered link queries
Replace boolean reverse parameter with Order enum
expand 16 comments

Damn, I just realized I have not yet added a lexicon for the new endpoint. Will do that now. At least the endpoint logic itself should not be affected by that and thus be ready for review, given its similarity to the existing getManyToManyCounts endpoint.

As Lexicons do not offer support for tuples as primitive field types I had to convert the return to use a new RecordsBySubject struct instead, which closely matches what you already did for the m2m counts endpoint.

The comments obviously don't belong here... please refer to the comments made for the PR #7

alright! reading back my own code gave me such a headache with this!

i'm reverting a few parts of this:

  • deprecated links endpoint: going to hard-code it to the existing order (deprecated endpoints don't get new features)

  • reverse ordering on many-to-many counts.

the many-to-many counts endpoint is a weird one to think about because it's not obvious what an "order" on it means. especially since the counts are aggregated, which row should be first in the response?

the current behaviour is deterministic but arbitrary: items are sorted by the other subject's target id (ie., the first time we saw the joined thingy), which is potentially unrelated to the actual many-to-many records themselves. if someone later creates a many-to-many record linking an earlier other subject, it'll become first in the list.

so: the determinism of that other-subject-target-id sorting is what enables paging to work at this endpoint, but it doesn't impose a client-meaningful order of items in the response. reversing an arbitrary order just gives you a different arbitrary order.

hopefully that makes sense ha

(additionally: there is a bit of optimization in the main join loop in the rocksdb implementation that assumes target-id-order to save work from unused pages, and i could be wrong, but i think it wouldn't stay valid under reverse paging)

(oh but i do think we can add a reverse option to the distinct-dids endpoint! probably under a new xrpc version.)

Just so that I understand you correctly here: We decided to not proceed with the implementation of a reverse order parameter for many-to-many related things as the current order is deterministic but arbitrary in nature, and thus reversing is essentially not providing any more information that is meaningful in the end.

If it's not too much trouble, do you mind explaining why you used Iterator::skip and ::take for the slicing? Is this simply more idiomatic or more performant even? :)

Really liked the way you resolved empty results with the empty impl by the way. Definitely less verbose and even clearer than what I opted for haha

many-to-many: yes exactly. it's not clear what expectation we would be trying to meet for a client requesting the many-to-many counts in reverse.

i'm open to being wrong here! when thinking about ordering, i was focused on the order of the many-to-many join-records themselves. which is where things felt meaningless/arbitrary because a) they're aggregated together (at this endpoint) and b) their order of creation is independent of the order this endpoint returns its results in.

but maybe having results ordered by the other-target-id does have client-relevant meaning. joined subjects are sorted by their age in a global sense?

thinking through it with concrete examples, i'm more inclined to bring it back:

  • joining tangled issues to a label: issues are ordered oldest-to-newest (independent from when the label was added)
  • joining bluesky users to a bluesky list: users are ordered by the age of their account (again independent from when they were added to the list)

hmm.

the thing that still feels off to me for these is the fact that this endpoint is an aggregate of many-to-many counts. that implies to me that the count is the thing of interest, and the order of responses in relation to the count is arbitrary again.

trying to keep the non-aggregated version of this endpoint in view, does ordering by the linked subject age make sense then? (ordering by the link actually feels more natural to me in that case) (i think i just talked myself out of and then back into bringing back your ordering here!!)

skip and take: yeah, it's a little more functional-style which works well for my brain. i like a big pipeline of chained transformations operating on sequences so i took the chance to do a little refactor with it. especially since it's immediately setting up an iterator right after slicing, it feels more cohesive to me.

it can be technically the other things too:

indexing ranges like thing[start..end] can panic if either index is out of bounds. in some cases the compiler can prove we already did the bounds checks and remove the panic hooks, but i suspect it can't here. even though we can prove that the indices are in range by reasoning about the code, it's nice not to have things-that-can-panic in the code.

i think it eliminated a Vec::reverse() in favour of an Iterator::rev(), which is potentially a very tiny efficiency improvement

m2m: I think this just boils down to how we communicate this fact to users of our API imo. As your internal back and forth shows there can be valid arguments in both ways. I think most users might naively - as I did at first - simply expect the links/records to be returned in the order they were created in. But, clearly documenting this in the endpoint description seems like a good idea here to make sure we don't leave any kind of ambiguity on the table.

skip and take: That's actually a great argument and I tend to agree with you on both fronts. Having worked professionally more with Typescript than, say, C, I always liked that you could chain transformations to a specific object together (map, filter and so on). The additional safety we get here from using iterators to slice data is new for me but definitely worth it.

m2m (again): Depending on how you see this I might reopen the PR again and introduce the parameter again. I let you make the final judgement call ofc :)

thanks for following up! i really appreciate the discussion!

100% agree the most important thing is communicating and setting accurate expectations in the api docs regardless of how this lands.

I really had convinced myself that the order was "meaningless" for clients, but then every example i was thinking of was like, welllll it kind of is actually meaningful. After a few days I'm still feeling that way, so I'm interested in bringing this back.

(the other thing that got me was switching to the other M2M PR, and remembering that this endpoint is useful as a "distinct" version of that query. it's not only useful for its counts)

i do think there's some extra logic needed in one of the loops that currently tries to avoid some work based on cursor assumptions.

aaaaaand i think there might be a bug in the existing logic, so this will be extra fun sorry! (i will see if i can finally get that to a proper repro or disprove it, so we can hopefully fix that first)

Sounds great! I think no matter how you look at it, the order is not entirely arbitrary as we collect results on the order they're stored.

Do you mind elaborating a bit further on your last two remarks before I open the PR again?! Might be easier for me to know what to look for then!

closed without merging
seoul.systems submitted #0
5 commits
expand
Add reverse ordering support to link query endpoints
Fix failing tests
Format tests
Add tests for reverse ordering in link queries
Fix pagination logic for reverse-ordered link queries
expand 7 comments

Looks like we might need a rebase, seems like this picked up (and is trying to undo) the changes from your other two PRs

✅ naming the query param reverse sounds good to me since it's consistent with com.atproto.repo.listRecords (and i assume/hopefully most other atproto lexicons)

i think we double-reverse for reverse-order backlinks:

https://tangled.org/microcosm.blue/microcosm-rs/pulls/6/round/0#constellation%2fsrc%2fstorage%2frocks_store.rs-N1200

could probably work it so we only call reverse once (when "forward". not confusing at all.)

Looks like we might need a rebase, seems like this picked up (and is trying to undo) the changes from your other two PRs

I think merging upstream into the fork might work as well and makes things easier for you as the reviewer. I don't mind rebasing ofc.

i think we double-reverse for reverse-order backlinks:

Yeah the existing .rev() calls were a bit confusing at first. Maybe changing the default order to return the oldest links per default might make things a bit clearer? Though I think this is mostly helping us as the maintainers as users might expect to get served the newest ones most of the time.

Another option could be introducing an "order" query parameter with two possible values asc and desc (or something similar) to make things a bit more clear what the default order is, but then we would lose the consistency between our endpoint and com.atproto.repo.listRecords.

We could reduce the number of times we reverse using the following I think:

let mut did_id_rkeys = linkers.0[begin..end].iter().collect::<Vec<_>>();

if !reverse {
    did_id_rkeys.reverse();
}

Some comments might be justified though. That conditional looks super confusing lol

Upon thinking about this a bit more I think we should keep the reverse query parameter as is and don't touch the default order as this would be an unnecessary API break. Instead we could just introduce an enum and convert the query parameter to an enum value immediately after receiving. Other operations would be clearer then. We should still add some more context somewhere in the comments though. I had something like this in mind. What do you think?

// As backlinks are stored chronologically (oldest → newest) we need to reverse for newest-first queries
pub enum Order {
    NewestToOldest,  // default (reverse=false)
    OldestToNewest,  // reverse=true
}

impl From<bool> for Order {
    fn from(reverse: bool) -> Self {
        if reverse {
            Order::OldestToNewest
        } else {
            Order::NewestToOldest
        }
    }
}

// In server/mod.rs, where we receive the query parameter
fn get_links(
    accept: ExtractAccept,
    query: axum_extra::extract::Query<GetLinkItemsQuery>,
    store: impl LinkReader,
) -> Result<impl IntoResponse, http::StatusCode> {
    // Convert boolean to enum right at the boundary
    let order = Order::from(query.reverse);

    // Now pass the enum to storage layer
    let paged = store.get_links(
        &query.target,
        &query.collection,
        &query.path,
        order,  // ← Clean enum instead of mysterious boolean
        limit,
        until,
        &filter_dids,
    )?;
    // ...
}

// In rocks_store.rs get_links() - this replaces the confusing double-reverse!
let did_id_rkeys = match order {
    Order::OldestToNewest => {
        begin = until.map(|u| (u - 1) as usize).unwrap_or(0);
        end = std::cmp::min(begin + limit as usize, total as usize);
        next = if end < total as usize {
            Some(end as u64 + 1)
        } else {
            None
        };
        // No reversal - storage is already chronological
        linkers.0[begin..end].iter().collect::<Vec<_>>()
    }
    Order::NewestToOldest => {
        end = until.map(|u| std::cmp::min(u, total)).unwrap_or(total) as usize;
        begin = end.saturating_sub(limit as usize);
        next = if begin == 0 { None } else { Some(begin as u64) };
        // Reverse chronological storage to get newest-to-oldest
        linkers.0[begin..end].iter().rev().collect::<Vec<_>>()
    }
};

Thanks for the follow-up!

For the API, I agree with where you landed: keep it newest-first by default, optional reverse param being true makes it chronological. Small nit would be to skip the From<bool> impl since a bool itself doesn't inherently carry directional meaning; the endpoint gives it that meaning, so we can put the let order = if query.reverse { Order::OldestToNewest } ... directly in the endpoint.

i think you're right to just put the whole construction of the vec and cursor into branches on Order. part of me wants to try and encapsulate it a bit and switch the indexing to use linkers.0.iter().skip(begin).take(end) or whatever but the .rev() in there will still require a branch and/or make type things annoying and probably all of it less clear. i like your code.

comments about order are excellent thank you :)

Rebased the PR to match the current state of main and introduced the Order enum to make the sorting of items a bit clearer throughout the handlers that use the reverse parameter already (followed your suggestion regarding the From<bool> and put the definition of the order parameter directly in the endpoint itself before passing it as a parameter to the underlying handler).