TCP/TLS connection pooling for Eio

testcase

+340
+3
test/dune
··· 1 + (executable 2 + (name stress_test) 3 + (libraries conpool eio eio_main logs logs.fmt fmt))
+337
test/stress_test.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Stress test framework for conpool 7 + 8 + Spawns variable number of echo servers on random ports, then exercises 9 + the connection pool with multiple parallel client fibers. 10 + *) 11 + 12 + let src = Logs.Src.create "stress_test" ~doc:"Connection pool stress test" 13 + module Log = (val Logs.src_log src : Logs.LOG) 14 + 15 + (** Configuration for the stress test *) 16 + type config = { 17 + num_servers : int; (** Number of echo servers to spawn *) 18 + num_clients : int; (** Number of client connections per server *) 19 + messages_per_client : int; (** Number of messages each client sends *) 20 + max_parallel_clients : int; (** Maximum concurrent client fibers *) 21 + message_size : int; (** Size of each message in bytes *) 22 + pool_size : int; (** Max connections per endpoint *) 23 + } 24 + 25 + let default_config = { 26 + num_servers = 3; 27 + num_clients = 10; 28 + messages_per_client = 5; 29 + max_parallel_clients = 20; 30 + message_size = 64; 31 + pool_size = 5; 32 + } 33 + 34 + (** Statistics collected during test *) 35 + type stats = { 36 + mutable total_connections : int; 37 + mutable total_messages : int; 38 + mutable total_bytes : int; 39 + mutable errors : int; 40 + mutable min_latency : float; 41 + mutable max_latency : float; 42 + mutable total_latency : float; 43 + } 44 + 45 + let create_stats () = { 46 + total_connections = 0; 47 + total_messages = 0; 48 + total_bytes = 0; 49 + errors = 0; 50 + min_latency = Float.infinity; 51 + max_latency = 0.0; 52 + total_latency = 0.0; 53 + } 54 + 55 + let update_latency stats latency = 56 + stats.min_latency <- min stats.min_latency latency; 57 + stats.max_latency <- max stats.max_latency latency; 58 + stats.total_latency <- stats.total_latency +. latency 59 + 60 + (** Generate a random message of given size *) 61 + let generate_message size = 62 + let chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" in 63 + let len = String.length chars in 64 + String.init size (fun _ -> chars.[Random.int len]) 65 + 66 + (** Echo server handler - echoes back everything it receives *) 67 + let handle_echo_client flow addr = 68 + Log.debug (fun m -> m "Echo server: accepted connection from %a" 69 + Eio.Net.Sockaddr.pp addr); 70 + let buf = Cstruct.create 4096 in 71 + let rec loop () = 72 + match Eio.Flow.single_read flow buf with 73 + | n -> 74 + let data = Cstruct.sub buf 0 n in 75 + Eio.Flow.write flow [data]; 76 + loop () 77 + | exception End_of_file -> 78 + Log.debug (fun m -> m "Echo server: client disconnected from %a" 79 + Eio.Net.Sockaddr.pp addr) 80 + in 81 + loop () 82 + 83 + (** Start an echo server on a random port, returns the port number *) 84 + let start_echo_server ~sw net = 85 + (* Listen on port 0 to get a random available port *) 86 + let addr = `Tcp (Eio.Net.Ipaddr.V4.loopback, 0) in 87 + let listening_socket = Eio.Net.listen net ~sw ~backlog:128 ~reuse_addr:true addr in 88 + 89 + (* Get the actual port assigned *) 90 + let actual_addr = Eio.Net.listening_addr listening_socket in 91 + let port = match actual_addr with 92 + | `Tcp (_, port) -> port 93 + | _ -> failwith "Expected TCP address" 94 + in 95 + 96 + Log.info (fun m -> m "Echo server started on port %d" port); 97 + 98 + (* Start accepting connections in a daemon fiber. 99 + The daemon runs until cancelled when the switch finishes. *) 100 + Eio.Fiber.fork_daemon ~sw (fun () -> 101 + try 102 + while true do 103 + Eio.Net.accept_fork ~sw listening_socket 104 + ~on_error:(fun ex -> 105 + Log.warn (fun m -> m "Echo server error: %a" Fmt.exn ex)) 106 + handle_echo_client 107 + done; 108 + `Stop_daemon 109 + with Eio.Cancel.Cancelled _ -> 110 + `Stop_daemon 111 + ); 112 + 113 + port 114 + 115 + (** Client test: connect via pool, send message, verify echo *) 116 + let run_client_test ~clock pool endpoint message test_stats = 117 + let msg_len = String.length message in 118 + let start_time = Eio.Time.now clock in 119 + 120 + try 121 + Conpool.with_connection pool endpoint (fun flow -> 122 + (* Send message *) 123 + Eio.Flow.copy_string message flow; 124 + Eio.Flow.copy_string "\n" flow; (* delimiter *) 125 + 126 + (* Read echo response *) 127 + let response = Eio.Buf_read.of_flow flow ~max_size:(msg_len + 1) in 128 + let echoed = Eio.Buf_read.line response in 129 + 130 + let end_time = Eio.Time.now clock in 131 + let latency = end_time -. start_time in 132 + 133 + if String.equal echoed message then begin 134 + test_stats.total_messages <- test_stats.total_messages + 1; 135 + test_stats.total_bytes <- test_stats.total_bytes + msg_len; 136 + update_latency test_stats latency; 137 + Log.debug (fun m -> m "Client: echoed %d bytes in %.3fms" 138 + msg_len (latency *. 1000.0)) 139 + end else begin 140 + test_stats.errors <- test_stats.errors + 1; 141 + Log.err (fun m -> m "Client: echo mismatch! sent=%S got=%S" message echoed) 142 + end 143 + ); 144 + test_stats.total_connections <- test_stats.total_connections + 1 145 + with ex -> 146 + test_stats.errors <- test_stats.errors + 1; 147 + Log.err (fun m -> m "Client error: %a" Fmt.exn ex) 148 + 149 + (** Run a single client that sends multiple messages *) 150 + let run_client ~clock pool endpoints config test_stats client_id = 151 + Log.debug (fun m -> m "Starting client %d" client_id); 152 + 153 + for msg_num = 1 to config.messages_per_client do 154 + (* Pick a random endpoint *) 155 + let endpoint_idx = Random.int (Array.length endpoints) in 156 + let endpoint = endpoints.(endpoint_idx) in 157 + 158 + (* Generate unique message *) 159 + let message = Printf.sprintf "client%d-msg%d-%s" 160 + client_id msg_num (generate_message config.message_size) in 161 + 162 + run_client_test ~clock pool endpoint message test_stats 163 + done; 164 + 165 + Log.debug (fun m -> m "Client %d completed" client_id) 166 + 167 + (** Main stress test runner *) 168 + let run_stress_test ~env config = 169 + let net = Eio.Stdenv.net env in 170 + let clock = Eio.Stdenv.clock env in 171 + 172 + Log.info (fun m -> m "=== Stress Test Configuration ==="); 173 + Log.info (fun m -> m "Servers: %d" config.num_servers); 174 + Log.info (fun m -> m "Clients per server: %d" config.num_clients); 175 + Log.info (fun m -> m "Messages per client: %d" config.messages_per_client); 176 + Log.info (fun m -> m "Max parallel clients: %d" config.max_parallel_clients); 177 + Log.info (fun m -> m "Message size: %d bytes" config.message_size); 178 + Log.info (fun m -> m "Pool size per endpoint: %d" config.pool_size); 179 + 180 + (* Use a sub-switch for servers so we can cancel them when done *) 181 + let test_passed = ref false in 182 + let expected_messages = ref 0 in 183 + 184 + Eio.Switch.run @@ fun sw -> 185 + (* Start echo servers *) 186 + Log.info (fun m -> m "Starting %d echo servers..." config.num_servers); 187 + let ports = Array.init config.num_servers (fun _ -> 188 + start_echo_server ~sw net 189 + ) in 190 + 191 + (* Small delay to ensure servers are ready *) 192 + Eio.Time.sleep clock 0.1; 193 + 194 + (* Create endpoints for all servers *) 195 + let endpoints = Array.map (fun port -> 196 + Conpool.Endpoint.make ~host:"127.0.0.1" ~port 197 + ) ports in 198 + 199 + Log.info (fun m -> m "Servers ready on ports: %s" 200 + (String.concat ", " (Array.to_list (Array.map string_of_int ports)))); 201 + 202 + (* Create connection pool *) 203 + let pool_config = Conpool.Config.make 204 + ~max_connections_per_endpoint:config.pool_size 205 + ~max_idle_time:30.0 206 + ~max_connection_lifetime:120.0 207 + ~connect_timeout:5.0 208 + ~connect_retry_count:3 209 + () 210 + in 211 + 212 + let pool = Conpool.create ~sw ~net ~clock ~config:pool_config () in 213 + Log.info (fun m -> m "Connection pool created"); 214 + 215 + (* Initialize test statistics *) 216 + let test_stats = create_stats () in 217 + 218 + (* Calculate total clients *) 219 + let total_clients = config.num_servers * config.num_clients in 220 + expected_messages := total_clients * config.messages_per_client; 221 + Log.info (fun m -> m "Running %d total clients..." total_clients); 222 + 223 + let start_time = Eio.Time.now clock in 224 + 225 + (* Run clients in parallel using Fiber.List *) 226 + let client_ids = List.init total_clients (fun i -> i) in 227 + Eio.Fiber.List.iter ~max_fibers:config.max_parallel_clients 228 + (fun client_id -> 229 + run_client ~clock pool endpoints config test_stats client_id) 230 + client_ids; 231 + 232 + let end_time = Eio.Time.now clock in 233 + let total_time = end_time -. start_time in 234 + 235 + (* Print results *) 236 + Log.info (fun m -> m ""); 237 + Log.info (fun m -> m "=== Test Results ==="); 238 + Log.info (fun m -> m "Total time: %.3fs" total_time); 239 + Log.info (fun m -> m "Total connections: %d" test_stats.total_connections); 240 + Log.info (fun m -> m "Total messages: %d" test_stats.total_messages); 241 + Log.info (fun m -> m "Total bytes transferred: %d" test_stats.total_bytes); 242 + Log.info (fun m -> m "Errors: %d" test_stats.errors); 243 + 244 + if test_stats.total_messages > 0 then begin 245 + let avg_latency = test_stats.total_latency /. 246 + float_of_int test_stats.total_messages in 247 + Log.info (fun m -> m "Latency (min/avg/max): %.3fms / %.3fms / %.3fms" 248 + (test_stats.min_latency *. 1000.0) 249 + (avg_latency *. 1000.0) 250 + (test_stats.max_latency *. 1000.0)); 251 + Log.info (fun m -> m "Throughput: %.1f messages/sec" 252 + (float_of_int test_stats.total_messages /. total_time)); 253 + Log.info (fun m -> m "Bandwidth: %.1f KB/sec" 254 + (float_of_int test_stats.total_bytes /. total_time /. 1024.0)) 255 + end; 256 + 257 + (* Print pool statistics for each endpoint *) 258 + Log.info (fun m -> m ""); 259 + Log.info (fun m -> m "=== Pool Statistics ==="); 260 + Array.iteri (fun i endpoint -> 261 + let stats = Conpool.stats pool endpoint in 262 + Log.info (fun m -> m "Endpoint %d (port %d):" i ports.(i)); 263 + Log.info (fun m -> m " Active: %d, Idle: %d" 264 + (Conpool.Stats.active stats) (Conpool.Stats.idle stats)); 265 + Log.info (fun m -> m " Created: %d, Reused: %d, Closed: %d, Errors: %d" 266 + (Conpool.Stats.total_created stats) 267 + (Conpool.Stats.total_reused stats) 268 + (Conpool.Stats.total_closed stats) 269 + (Conpool.Stats.errors stats)) 270 + ) endpoints; 271 + 272 + (* Verify success *) 273 + test_passed := test_stats.errors = 0 && 274 + test_stats.total_messages = !expected_messages; 275 + 276 + if !test_passed then 277 + Log.info (fun m -> m "TEST PASSED: All %d messages echoed successfully!" 278 + !expected_messages) 279 + else 280 + Log.err (fun m -> m "TEST FAILED: Expected %d messages, got %d with %d errors" 281 + !expected_messages test_stats.total_messages test_stats.errors); 282 + 283 + (* Cancel the switch to stop servers and exit cleanly *) 284 + Eio.Switch.fail sw Exit 285 + 286 + (** Parse command line arguments *) 287 + let parse_config () = 288 + let num_servers = ref default_config.num_servers in 289 + let num_clients = ref default_config.num_clients in 290 + let messages_per_client = ref default_config.messages_per_client in 291 + let max_parallel = ref default_config.max_parallel_clients in 292 + let message_size = ref default_config.message_size in 293 + let pool_size = ref default_config.pool_size in 294 + let verbose = ref false in 295 + 296 + let specs = [ 297 + ("-s", Arg.Set_int num_servers, 298 + Printf.sprintf "Number of echo servers (default: %d)" default_config.num_servers); 299 + ("-c", Arg.Set_int num_clients, 300 + Printf.sprintf "Clients per server (default: %d)" default_config.num_clients); 301 + ("-m", Arg.Set_int messages_per_client, 302 + Printf.sprintf "Messages per client (default: %d)" default_config.messages_per_client); 303 + ("-p", Arg.Set_int max_parallel, 304 + Printf.sprintf "Max parallel clients (default: %d)" default_config.max_parallel_clients); 305 + ("-b", Arg.Set_int message_size, 306 + Printf.sprintf "Message size in bytes (default: %d)" default_config.message_size); 307 + ("-P", Arg.Set_int pool_size, 308 + Printf.sprintf "Pool size per endpoint (default: %d)" default_config.pool_size); 309 + ("-v", Arg.Set verbose, "Enable verbose/debug logging"); 310 + ] in 311 + 312 + let usage = "Usage: stress_test [options]" in 313 + Arg.parse specs (fun _ -> ()) usage; 314 + 315 + (* Configure logging *) 316 + Logs.set_reporter (Logs_fmt.reporter ()); 317 + if !verbose then 318 + Logs.set_level (Some Logs.Debug) 319 + else 320 + Logs.set_level (Some Logs.Info); 321 + 322 + { 323 + num_servers = !num_servers; 324 + num_clients = !num_clients; 325 + messages_per_client = !messages_per_client; 326 + max_parallel_clients = !max_parallel; 327 + message_size = !message_size; 328 + pool_size = !pool_size; 329 + } 330 + 331 + let () = 332 + Random.self_init (); 333 + let config = parse_config () in 334 + Eio_main.run @@ fun env -> 335 + (* Catch Exit which is used to signal clean shutdown *) 336 + try run_stress_test ~env config 337 + with Exit -> ()