fix(relay): address createSession code review findings

An easy-to-host PDS on the ATProtocol, MacOS. Grandma-approved.

- Move clear_failures to after tx.commit() so the counter is not reset
on a mid-flight 500 (JWT sign or DB commit failure)
- Log tracing::error\! on all four Mutex::lock PoisonError paths instead
of silently discarding the error
- Log tracing::error\! when stored password_hash fails PHC parse (DB
corruption indicator) rather than treating it as a wrong password
- Return 500 for clock-before-epoch instead of issuing expired JWTs via
unwrap_or_default()
- Use DB-stored h.handle in the handle-path response instead of echoing
the raw user input (fixes non-canonical casing)
- Remove stale scaffolding doc-comment and "goroutine" terminology from
is_rate_limited
- Add three tests: deactivated_account_returns_401,
wrong_password_and_unknown_identifier_return_identical_errors,
successful_login_clears_rate_limit_counter

malpercio.dev 5 hours ago 4387bbd1 8749c0d7

ci.yaml

success 4s

+134 -34

1 changed file

expand all

crates

relay

src

routes

create_session.rs

+134 -34

crates/relay/src/routes/create_session.rs

··· 96 96 let mut attempts = state 97 97 .failed_login_attempts 98 98 .lock() 99 - .map_err(|_| ApiError::new(ErrorCode::InternalError, "internal error"))?; 99 + .map_err(|_| { 100 + tracing::error!("failed_login_attempts mutex is poisoned"); 101 + ApiError::new(ErrorCode::InternalError, "internal error") 102 + })?; 100 103 if is_rate_limited(&mut attempts, &payload.identifier) { 101 104 return Err(ApiError::new( 102 105 ErrorCode::RateLimited, ··· 121 124 let mut attempts = state 122 125 .failed_login_attempts 123 126 .lock() 124 - .map_err(|_| ApiError::new(ErrorCode::InternalError, "internal error"))?; 127 + .map_err(|_| { 128 + tracing::error!("failed_login_attempts mutex is poisoned"); 129 + ApiError::new(ErrorCode::InternalError, "internal error") 130 + })?; 125 131 record_failure(&mut attempts, &payload.identifier); 126 132 return Err(ApiError::new( 127 133 ErrorCode::AuthenticationRequired, ··· 134 140 let mut attempts = state 135 141 .failed_login_attempts 136 142 .lock() 137 - .map_err(|_| ApiError::new(ErrorCode::InternalError, "internal error"))?; 143 + .map_err(|_| { 144 + tracing::error!("failed_login_attempts mutex is poisoned"); 145 + ApiError::new(ErrorCode::InternalError, "internal error") 146 + })?; 138 147 record_failure(&mut attempts, &payload.identifier); 139 148 return Err(ApiError::new( 140 149 ErrorCode::AuthenticationRequired, ··· 143 152 } 144 153 }; 145 154 146 - // --- Clear failure history on successful authentication --- 147 - { 148 - let mut attempts = state 149 - .failed_login_attempts 150 - .lock() 151 - .map_err(|_| ApiError::new(ErrorCode::InternalError, "internal error"))?; 152 - clear_failures(&mut attempts, &payload.identifier); 153 - } 154 - 155 155 // --- Issue legacy HS256 JWTs --- 156 156 let now = SystemTime::now() 157 157 .duration_since(UNIX_EPOCH) 158 - .unwrap_or_default() 158 + .map_err(|e| { 159 + tracing::error!(error = %e, "system clock is before Unix epoch"); 160 + ApiError::new(ErrorCode::InternalError, "failed to issue token") 161 + })? 159 162 .as_secs(); 160 163 161 164 // Prefer server_did as audience (what verify_hs256_access_token validates against ··· 212 215 ApiError::new(ErrorCode::InternalError, "failed to create session") 213 216 })?; 214 217 218 + // Clear failure history only after the session is fully committed. 219 + // Doing this earlier would reset the counter even if JWT issuance or the 220 + // DB transaction subsequently fails. 221 + { 222 + let mut attempts = state 223 + .failed_login_attempts 224 + .lock() 225 + .map_err(|_| { 226 + tracing::error!("failed_login_attempts mutex is poisoned"); 227 + ApiError::new(ErrorCode::InternalError, "internal error") 228 + })?; 229 + clear_failures(&mut attempts, &payload.identifier); 230 + } 231 + 215 232 Ok(( 216 233 StatusCode::OK, 217 234 Json(CreateSessionResponse { ··· 256 273 handle, 257 274 })) 258 275 } else { 259 - let row: Option<(String, String, Option<String>)> = sqlx::query_as( 260 - "SELECT a.did, a.email, a.password_hash \ 276 + let row: Option<(String, String, Option<String>, String)> = sqlx::query_as( 277 + "SELECT a.did, a.email, a.password_hash, h.handle \ 261 278 FROM handles h \ 262 279 JOIN accounts a ON a.did = h.did \ 263 280 WHERE h.handle = ? AND a.deactivated_at IS NULL \ ··· 271 288 ApiError::new(ErrorCode::InternalError, "failed to resolve identifier") 272 289 })?; 273 290 274 - Ok(row.map(|(did, email, password_hash)| AccountRow { 291 + Ok(row.map(|(did, email, password_hash, handle)| AccountRow { 275 292 did, 276 293 email, 277 294 password_hash, 278 - handle: Some(identifier.to_string()), 295 + handle: Some(handle), 279 296 })) 280 297 } 281 298 } ··· 283 300 /// Verify `password` against a stored argon2id PHC-format hash string. 284 301 fn verify_password(stored_hash: &str, password: &str) -> bool { 285 302 let Ok(hash) = PasswordHash::new(stored_hash) else { 303 + tracing::error!("stored password_hash is not a valid PHC string; possible DB corruption"); 286 304 return false; 287 305 }; 288 306 Argon2::default() ··· 346 364 /// attempts within the last `RATE_LIMIT_WINDOW_SECS` seconds (sliding window). 347 365 /// 348 366 /// Prunes expired entries from the front of the deque during the check, keeping 349 - /// memory bounded without a separate cleanup goroutine. 350 - /// 351 - /// # Your turn 352 - /// 353 - /// Implement this function. The approach: 354 - /// 1. Look up the `VecDeque` for `identifier`; return `false` if absent (no prior failures). 355 - /// 2. Drop entries from the **front** that are older than `RATE_LIMIT_WINDOW_SECS`. 356 - /// 3. Return `true` if the remaining count ≥ `RATE_LIMIT_MAX_FAILURES`. 357 - /// 358 - /// Trade-off to consider: a sliding window is accurate but allocates one `Instant` per 359 - /// failure. A fixed window (keyed by `now / window_secs`) uses O(1) memory but can allow 360 - /// 2× the limit at a window boundary. Either is valid for v0.1 — pick the approach that 361 - /// makes the rate-limit test below pass. 367 + /// memory bounded without a separate background task. 362 368 fn is_rate_limited( 363 369 attempts: &mut HashMap<String, VecDeque<Instant>>, 364 370 identifier: &str, ··· 682 688 assert_eq!(response.status(), StatusCode::UNAUTHORIZED); 683 689 } 684 690 685 - // ── Rate limiting (requires is_rate_limited implementation) ─────────────── 686 - // 687 - // This test will fail until you implement `is_rate_limited` above. 688 - // After 5 failures the 6th attempt should receive 429 Too Many Requests. 691 + // ── Rate limiting ───────────────────────────────────────────────────────── 689 692 690 693 #[tokio::test] 691 694 async fn rate_limit_triggers_after_five_failures() { ··· 711 714 .await 712 715 .unwrap(); 713 716 assert_eq!(response.status(), StatusCode::TOO_MANY_REQUESTS); 717 + } 718 + 719 + #[tokio::test] 720 + async fn deactivated_account_returns_401() { 721 + let state = test_state().await; 722 + insert_account_with_password( 723 + &state.db, 724 + "did:plc:deactivated", 725 + "deact.test.example.com", 726 + "deact@example.com", 727 + "password", 728 + ) 729 + .await; 730 + 731 + sqlx::query( 732 + "UPDATE accounts SET deactivated_at = datetime('now') WHERE did = 'did:plc:deactivated'", 733 + ) 734 + .execute(&state.db) 735 + .await 736 + .unwrap(); 737 + 738 + let response = app(state) 739 + .oneshot(post_create_session("did:plc:deactivated", "password")) 740 + .await 741 + .unwrap(); 742 + 743 + assert_eq!(response.status(), StatusCode::UNAUTHORIZED); 744 + } 745 + 746 + #[tokio::test] 747 + async fn wrong_password_and_unknown_identifier_return_identical_errors() { 748 + let state = test_state().await; 749 + insert_account_with_password( 750 + &state.db, 751 + "did:plc:enumtest", 752 + "enumtest.test.example.com", 753 + "enumtest@example.com", 754 + "correctpassword", 755 + ) 756 + .await; 757 + 758 + let wrong_pw = app(state.clone()) 759 + .oneshot(post_create_session("did:plc:enumtest", "wrongpassword")) 760 + .await 761 + .unwrap(); 762 + let unknown = app(state) 763 + .oneshot(post_create_session("did:plc:nobody", "anything")) 764 + .await 765 + .unwrap(); 766 + 767 + assert_eq!(wrong_pw.status(), unknown.status()); 768 + let wrong_pw_json = body_json(wrong_pw).await; 769 + let unknown_json = body_json(unknown).await; 770 + assert_eq!(wrong_pw_json["error"]["code"], unknown_json["error"]["code"]); 771 + assert_eq!( 772 + wrong_pw_json["error"]["message"], 773 + unknown_json["error"]["message"] 774 + ); 775 + } 776 + 777 + #[tokio::test] 778 + async fn successful_login_clears_rate_limit_counter() { 779 + let state = test_state().await; 780 + insert_account_with_password( 781 + &state.db, 782 + "did:plc:cleartest", 783 + "cleartest.test.example.com", 784 + "cleartest@example.com", 785 + "correctpassword", 786 + ) 787 + .await; 788 + 789 + // N-1 failed attempts (one below the threshold) 790 + for _ in 0..(RATE_LIMIT_MAX_FAILURES - 1) { 791 + app(state.clone()) 792 + .oneshot(post_create_session("did:plc:cleartest", "wrongpassword")) 793 + .await 794 + .unwrap(); 795 + } 796 + 797 + // Successful login clears the counter 798 + let ok = app(state.clone()) 799 + .oneshot(post_create_session("did:plc:cleartest", "correctpassword")) 800 + .await 801 + .unwrap(); 802 + assert_eq!(ok.status(), StatusCode::OK); 803 + 804 + // One more failure should be 401, not 429 — counter was reset 805 + let after = app(state) 806 + .oneshot(post_create_session("did:plc:cleartest", "wrongpassword")) 807 + .await 808 + .unwrap(); 809 + assert_eq!( 810 + after.status(), 811 + StatusCode::UNAUTHORIZED, 812 + "counter must have been cleared by the successful login" 813 + ); 714 814 } 715 815 }