TCP/TLS connection pooling for Eio

better eio errors

+14 -70
-5
lib/config.ml
··· 65 65 (Printf.sprintf "connect_retry_delay must be positive, got %.2f" 66 66 connect_retry_delay); 67 67 68 - Log.debug (fun m -> 69 - m 70 - "Creating config: max_connections=%d, max_idle=%.1fs, \ 71 - max_lifetime=%.1fs" 72 - max_connections_per_endpoint max_idle_time max_connection_lifetime); 73 68 { 74 69 max_connections_per_endpoint; 75 70 max_idle_time;
+14 -64
lib/conpool.ml
··· 111 111 (** {1 DNS Resolution} *) 112 112 113 113 let resolve_endpoint (pool : ('clock, 'net) internal) endpoint = 114 - Log.debug (fun m -> m "Resolving %a..." Endpoint.pp endpoint); 114 + Log.debug (fun m -> m "Resolving %a" Endpoint.pp endpoint); 115 115 try 116 116 let addrs = 117 117 Eio.Net.getaddrinfo_stream pool.net (Endpoint.host endpoint) 118 118 ~service:(string_of_int (Endpoint.port endpoint)) 119 119 in 120 - Log.debug (fun m -> m "Got address list for %a" Endpoint.pp endpoint); 121 120 match addrs with 122 121 | addr :: _ -> 123 122 Log.debug (fun m -> 124 123 m "Resolved %a to %a" Endpoint.pp endpoint Eio.Net.Sockaddr.pp addr); 125 124 addr 126 125 | [] -> 127 - Log.err (fun m -> 128 - m "Failed to resolve hostname: %s" (Endpoint.host endpoint)); 126 + (* Raise exception with error code - context will be added when caught *) 129 127 raise (err (Dns_resolution_failed { hostname = Endpoint.host endpoint })) 130 128 with Eio.Io _ as ex -> 131 129 let bt = Printexc.get_raw_backtrace () in ··· 136 134 let rec create_connection_with_retry (pool : ('clock, 'net) internal) endpoint 137 135 attempt last_error = 138 136 let retry_count = Config.connect_retry_count pool.config in 139 - if attempt > retry_count then begin 140 - Log.err (fun m -> 141 - m "Failed to connect to %a after %d attempts" Endpoint.pp endpoint 142 - retry_count); 143 - raise (err (Connection_failed { endpoint; attempts = retry_count; last_error })) 144 - end; 137 + if attempt > retry_count then 138 + (* Raise exception with error code - context will be added when caught *) 139 + raise (err (Connection_failed { endpoint; attempts = retry_count; last_error })); 145 140 146 141 Log.debug (fun m -> 147 142 m "Connecting to %a (attempt %d/%d)" Endpoint.pp endpoint attempt ··· 149 144 150 145 try 151 146 let addr = resolve_endpoint pool endpoint in 152 - Log.debug (fun m -> m "Resolved %a to address" Endpoint.pp endpoint); 153 147 154 148 (* Connect with optional timeout *) 155 149 let socket = ··· 323 317 (Config.on_connection_closed pool.config) 324 318 325 319 let get_or_create_endpoint_pool (pool : ('clock, 'net) internal) endpoint = 326 - Log.debug (fun m -> 327 - m "Getting or creating endpoint pool for %a" Endpoint.pp endpoint); 328 - 329 320 (* First try with read lock *) 330 321 match 331 322 Eio.Mutex.use_ro pool.endpoints_mutex (fun () -> 332 323 Hashtbl.find_opt pool.endpoints endpoint) 333 324 with 334 325 | Some ep_pool -> 335 - Log.debug (fun m -> 336 - m "Found existing endpoint pool for %a" Endpoint.pp endpoint); 337 326 ep_pool 338 327 | None -> 339 - Log.debug (fun m -> 340 - m "No existing pool, need to create for %a" Endpoint.pp endpoint); 341 328 (* Need to create - use write lock *) 342 329 Eio.Mutex.use_rw ~protect:true pool.endpoints_mutex (fun () -> 343 330 (* Check again in case another fiber created it *) 344 331 match Hashtbl.find_opt pool.endpoints endpoint with 345 332 | Some ep_pool -> 346 - Log.debug (fun m -> 347 - m "Another fiber created pool for %a" Endpoint.pp endpoint); 348 333 ep_pool 349 334 | None -> 350 335 (* Create new endpoint pool *) ··· 352 337 let mutex = Eio.Mutex.create () in 353 338 354 339 Log.info (fun m -> 355 - m "Creating new endpoint pool for %a (max_connections=%d)" 340 + m "Creating endpoint pool for %a (max_connections=%d)" 356 341 Endpoint.pp endpoint 357 342 (Config.max_connections_per_endpoint pool.config)); 358 343 359 - Log.debug (fun m -> 360 - m "About to create Eio.Pool for %a" Endpoint.pp endpoint); 361 - 362 344 let eio_pool = 363 345 Eio.Pool.create 364 346 (Config.max_connections_per_endpoint pool.config) 365 347 ~validate:(fun conn -> 366 - Log.debug (fun m -> 367 - m "Validate called for connection to %a" Endpoint.pp 368 - endpoint); 369 - (* Called before reusing from pool *) 370 348 let healthy = is_healthy pool ~check_readable:false conn in 371 - 372 349 if healthy then ( 373 - Log.debug (fun m -> 374 - m "Reusing connection to %a from pool" Endpoint.pp 375 - endpoint); 376 - 377 350 (* Update stats for reuse *) 378 351 Eio.Mutex.use_rw ~protect:true mutex (fun () -> 379 352 stats.total_reused <- stats.total_reused + 1); ··· 388 361 | Some check -> ( 389 362 try check (Connection.flow conn) with _ -> false) 390 363 | None -> true) 391 - else begin 392 - Log.debug (fun m -> 393 - m 394 - "Connection to %a failed validation, creating new \ 395 - one" 396 - Endpoint.pp endpoint); 397 - false 398 - end) 364 + else 365 + false) 399 366 ~dispose:(fun conn -> 400 367 (* Called when removing from pool *) 401 368 Eio.Cancel.protect (fun () -> ··· 405 372 Eio.Mutex.use_rw ~protect:true mutex (fun () -> 406 373 stats.total_closed <- stats.total_closed + 1))) 407 374 (fun () -> 408 - Log.debug (fun m -> 409 - m "Factory function called for %a" Endpoint.pp endpoint); 410 375 try 411 376 let conn = create_connection pool endpoint in 412 377 413 - Log.debug (fun m -> 414 - m "Connection created successfully for %a" Endpoint.pp 415 - endpoint); 416 - 417 378 (* Update stats *) 418 379 Eio.Mutex.use_rw ~protect:true mutex (fun () -> 419 380 stats.total_created <- stats.total_created + 1); ··· 424 385 (Config.on_connection_created pool.config); 425 386 426 387 conn 427 - with e -> 428 - Log.err (fun m -> 429 - m "Factory function failed for %a: %s" Endpoint.pp 430 - endpoint (Printexc.to_string e)); 431 - (* Update error stats *) 388 + with Eio.Io _ as ex -> 389 + (* Eio.Io exceptions already have full context from create_connection. 390 + Just update error stats and let the exception propagate. *) 432 391 Eio.Mutex.use_rw ~protect:true mutex (fun () -> 433 392 stats.errors <- stats.errors + 1); 434 - raise e) 393 + raise ex) 435 394 in 436 - 437 - Log.debug (fun m -> 438 - m "Eio.Pool created successfully for %a" Endpoint.pp endpoint); 439 395 440 396 let ep_pool = { pool = eio_pool; stats; mutex } in 441 - 442 397 Hashtbl.add pool.endpoints endpoint ep_pool; 443 - Log.debug (fun m -> 444 - m "Endpoint pool added to hashtable for %a" Endpoint.pp 445 - endpoint); 446 398 ep_pool) 447 399 448 400 (** {1 Public API - Pool Creation} *) ··· 534 486 535 487 `Stop_daemon 536 488 with e -> 537 - (* Error - close connection so it won't be reused *) 538 - Log.warn (fun m -> 539 - m "Error with connection to %a: %s" Endpoint.pp endpoint 540 - (Printexc.to_string e)); 489 + (* Error during connection usage - close so it won't be reused. 490 + The exception already has context from where it was raised. *) 541 491 close_internal pool conn; 542 492 543 493 (* Update error stats *)
-1
lib/endpoint.ml
··· 21 21 (* Validate hostname is not empty *) 22 22 if String.trim host = "" then invalid_arg "Hostname cannot be empty"; 23 23 24 - Log.debug (fun m -> m "Creating endpoint: %s:%d" host port); 25 24 { host; port } 26 25 27 26 let host t = t.host