@@ -21,8 +21,56 @@ def _timed_chat(config: BenchConfig, message: str) -> dict:
2121 }
2222
2323
24+ def _run_concurrency_scenario (configs : list [BenchConfig ], message_prefix : str ) -> tuple [list [float ], int , float ]:
25+ """Run one concurrency scenario and return (latencies_ms, errors, wall_time_ms)."""
26+ latencies : list [float ] = []
27+ errors = 0
28+ start_all = time .monotonic ()
29+
30+ with concurrent .futures .ThreadPoolExecutor (max_workers = len (configs )) as pool :
31+ futures = []
32+ for i , cfg in enumerate (configs ):
33+ futures .append (
34+ pool .submit (
35+ _timed_chat ,
36+ cfg ,
37+ f"{ message_prefix } { i } : what is { i } +{ i } ?" ,
38+ )
39+ )
40+ for f in concurrent .futures .as_completed (futures ):
41+ r = f .result ()
42+ if r ["error" ]:
43+ errors += 1
44+ else :
45+ latencies .append (r ["latency_ms" ])
46+
47+ wall_time_ms = (time .monotonic () - start_all ) * 1000
48+ return latencies , errors , wall_time_ms
49+
50+
51+ def _latency_stats (latencies : list [float ]) -> dict :
52+ if not latencies :
53+ return {}
54+ ordered = sorted (latencies )
55+ return {
56+ "p50_ms" : round (statistics .median (ordered ), 1 ),
57+ "p95_ms" : round (
58+ ordered [int (len (ordered ) * 0.95 )] if len (ordered ) >= 5 else max (ordered ),
59+ 1 ,
60+ ),
61+ "p99_ms" : round (
62+ ordered [int (len (ordered ) * 0.99 )] if len (ordered ) >= 10 else max (ordered ),
63+ 1 ,
64+ ),
65+ }
66+
67+
2468def run (config : BenchConfig ) -> dict :
25- """Test scale under concurrent load."""
69+ """Test scale in three modes:
70+ 1) same-session contention (single user)
71+ 2) multi-user parallel (one request per user)
72+ 3) transport/pool metric snapshot
73+ """
2674 results = {}
2775 concurrency = config .scale_concurrency
2876
@@ -40,47 +88,50 @@ def run(config: BenchConfig) -> dict:
4088 results ["baseline_p50_ms" ] = None
4189 results ["baseline_error" ] = "All baseline requests failed"
4290
43- # Phase 2: Concurrent load
44- latencies = []
45- errors = 0
46- start_all = time .monotonic ()
47-
48- with concurrent .futures .ThreadPoolExecutor (max_workers = concurrency ) as pool :
49- futures = []
50- for i in range (concurrency ):
51- futures .append (
52- pool .submit (
53- _timed_chat ,
54- config ,
55- f"Concurrent test request { i } : what is { i } +{ i } ?" ,
56- )
57- )
58- for f in concurrent .futures .as_completed (futures ):
59- r = f .result ()
60- if r ["error" ]:
61- errors += 1
62- else :
63- latencies .append (r ["latency_ms" ])
91+ # Phase 2a: Same-user same-session contention (diagnostic only; not primary scale metric)
92+ same_session_cfgs = [config for _ in range (concurrency )]
93+ same_latencies , same_errors , same_wall_ms = _run_concurrency_scenario (
94+ same_session_cfgs ,
95+ "Same-session contention request" ,
96+ )
97+ same_stats = _latency_stats (same_latencies )
98+ results ["same_session" ] = {
99+ "requests" : concurrency ,
100+ "errors" : same_errors ,
101+ "success" : len (same_latencies ),
102+ "wall_time_ms" : round (same_wall_ms , 1 ),
103+ "p50_ms" : same_stats .get ("p50_ms" ),
104+ "p95_ms" : same_stats .get ("p95_ms" ),
105+ "p99_ms" : same_stats .get ("p99_ms" ),
106+ }
64107
65- wall_time_ms = (time .monotonic () - start_all ) * 1000
108+ # Phase 2b: Multi-user parallel (primary scale metric)
109+ try :
110+ base_user_num = int (config .user_id )
111+ user_ids = [str (base_user_num + i ) for i in range (concurrency )]
112+ except ValueError :
113+ user_ids = [f"{ config .user_id } -bench-{ i } " for i in range (concurrency )]
114+ multi_cfgs = [config .clone_for_user (uid ) for uid in user_ids ]
115+ multi_latencies , multi_errors , multi_wall_ms = _run_concurrency_scenario (
116+ multi_cfgs ,
117+ "Multi-user concurrent request" ,
118+ )
119+ multi_stats = _latency_stats (multi_latencies )
120+ results ["multi_user" ] = {
121+ "requests" : concurrency ,
122+ "errors" : multi_errors ,
123+ "success" : len (multi_latencies ),
124+ "wall_time_ms" : round (multi_wall_ms , 1 ),
125+ "p50_ms" : multi_stats .get ("p50_ms" ),
126+ "p95_ms" : multi_stats .get ("p95_ms" ),
127+ "p99_ms" : multi_stats .get ("p99_ms" ),
128+ }
66129
67- results ["concurrent_requests" ] = concurrency
68- results ["concurrent_errors" ] = errors
69- results ["concurrent_success" ] = len (latencies )
70- results ["wall_time_ms" ] = round (wall_time_ms , 1 )
71-
72- if latencies :
73- latencies .sort ()
74- results ["concurrent_p50_ms" ] = round (statistics .median (latencies ), 1 )
75- results ["concurrent_p95_ms" ] = (
76- round (latencies [int (len (latencies ) * 0.95 )], 1 )
77- if len (latencies ) >= 5
78- else round (max (latencies ), 1 )
79- )
80- results ["concurrent_p99_ms" ] = (
81- round (latencies [int (len (latencies ) * 0.99 )], 1 )
82- if len (latencies ) >= 10
83- else round (max (latencies ), 1 )
130+ same_p95 = results ["same_session" ].get ("p95_ms" )
131+ multi_p95 = results ["multi_user" ].get ("p95_ms" )
132+ if same_p95 and multi_p95 and multi_p95 > 0 :
133+ results ["contention_ratio_same_session_over_multi_user" ] = round (
134+ same_p95 / multi_p95 , 2
84135 )
85136
86137 # Phase 3: Parse metrics for pool/transport stats
@@ -96,23 +147,22 @@ def run(config: BenchConfig) -> dict:
96147 results ["metrics_snapshot" ][parts [0 ]] = parts [1 ]
97148
98149 # Score calculation
99- success_rate = len (latencies ) / concurrency if concurrency > 0 else 0
100- p95 = results .get ("concurrent_p95_ms" , 60000 )
150+ multi_success_rate = len (multi_latencies ) / concurrency if concurrency > 0 else 0
151+ multi_p95 = results [ "multi_user" ] .get ("p95_ms" ) or 60000
101152
102- # inverse_rss (projected — can't measure from outside), inverse_p95, max_users, horizontal, inverse_cost
103- p95_score = max (0 , 100 - (p95 / 1000 )) # 0ms = 100, 100s = 0
104- success_score = success_rate * 100
153+ # Primary measured scale signal should use multi-user throughput, not same-session contention.
154+ multi_p95_score = max (0 , 100 - (multi_p95 / 1000 )) # 0ms = 100, 100s = 0
155+ verified_score = multi_p95_score * multi_success_rate
156+
157+ # Projection still includes unmeasured dimensions.
105158 projected_score = (
106- 0.70 * 0.25 # inverse_rss: projected (2.6MB binary, ~43MB test RSS)
107- + p95_score / 100 * 0.20
108- + 0.70 * 0.25 # max_users: projected (~1000/instance)
109- + 0.80 * 0.20 # horizontal: projected (tenant lock + Postgres)
159+ 0.70 * 0.25 # inverse_rss: projected
160+ + ( multi_p95_score / 100 ) * 0.20
161+ + 0.70 * 0.25 # max_users: projected
162+ + 0.80 * 0.20 # horizontal: projected
110163 + 0.60 * 0.10 # inverse_cost: projected
111164 ) * 100
112-
113- # Adjust based on actual success rate
114- projected_score = projected_score * success_rate
115- verified_score = p95_score * success_rate
165+ projected_score = projected_score * multi_success_rate
116166
117167 results ["score" ] = round (min (100 , projected_score ), 1 )
118168 results ["verified_score" ] = round (min (100 , verified_score ), 1 )
0 commit comments