55import concurrent .futures
66
77from .config import BenchConfig
8- from .sse_client import chat , get_metrics
8+ from .sse_client import chat , get_metrics , provision_user
99
1010
1111def _timed_chat (config : BenchConfig , message : str ) -> dict :
@@ -122,23 +122,73 @@ def run(config: BenchConfig) -> dict:
122122 user_ids = [str (base_user_num + i ) for i in range (concurrency )]
123123 except ValueError :
124124 user_ids = [f"{ config .user_id } -bench-{ i } " for i in range (concurrency )]
125- multi_cfgs = [config .clone_for_user (uid ) for uid in user_ids ]
126- multi_latencies , multi_errors , multi_wall_ms , multi_error_samples = _run_concurrency_scenario (
127- multi_cfgs ,
128- "Multi-user concurrent request" ,
129- )
130- multi_stats = _latency_stats (multi_latencies )
131- results ["multi_user" ] = {
132- "requests" : concurrency ,
133- "errors" : multi_errors ,
134- "success" : len (multi_latencies ),
135- "wall_time_ms" : round (multi_wall_ms , 1 ),
136- "p50_ms" : multi_stats .get ("p50_ms" ),
137- "p95_ms" : multi_stats .get ("p95_ms" ),
138- "p99_ms" : multi_stats .get ("p99_ms" ),
139- "error_samples" : multi_error_samples ,
125+ candidate_cfgs = [config .clone_for_user (uid ) for uid in user_ids ]
126+
127+ provisioned_cfgs : list [BenchConfig ] = []
128+ provisioned_users : list [str ] = []
129+ unavailable_users : list [str ] = []
130+ provisioning_error_samples : list [str ] = []
131+ for cfg in candidate_cfgs :
132+ provision_result = provision_user (cfg )
133+ if provision_result ["ok" ]:
134+ provisioned_cfgs .append (cfg )
135+ provisioned_users .append (cfg .user_id )
136+ continue
137+
138+ reason = provision_result .get ("reason" ) or "unknown"
139+ if reason == "unknown_user_id" :
140+ unavailable_users .append (cfg .user_id )
141+ elif len (provisioning_error_samples ) < 3 :
142+ sample = (
143+ f"user={ cfg .user_id } status={ provision_result .get ('status_code' )} "
144+ f"reason={ reason } "
145+ )
146+ if sample not in provisioning_error_samples :
147+ provisioning_error_samples .append (sample )
148+
149+ results ["multi_user_provisioning" ] = {
150+ "requested_users" : concurrency ,
151+ "provisioned_users" : len (provisioned_cfgs ),
152+ "provisioned_user_ids" : provisioned_users [:5 ],
153+ "unavailable_users" : len (unavailable_users ),
154+ "unavailable_user_ids" : unavailable_users [:5 ],
155+ "error_samples" : provisioning_error_samples ,
140156 }
141157
158+ if len (provisioned_cfgs ) >= 2 :
159+ multi_latencies , multi_errors , multi_wall_ms , multi_error_samples = _run_concurrency_scenario (
160+ provisioned_cfgs ,
161+ "Multi-user concurrent request" ,
162+ )
163+ multi_stats = _latency_stats (multi_latencies )
164+ results ["multi_user" ] = {
165+ "requests" : len (provisioned_cfgs ),
166+ "errors" : multi_errors ,
167+ "success" : len (multi_latencies ),
168+ "wall_time_ms" : round (multi_wall_ms , 1 ),
169+ "p50_ms" : multi_stats .get ("p50_ms" ),
170+ "p95_ms" : multi_stats .get ("p95_ms" ),
171+ "p99_ms" : multi_stats .get ("p99_ms" ),
172+ "error_samples" : multi_error_samples ,
173+ }
174+ identity_bootstrap_available = True
175+ else :
176+ results ["multi_user" ] = {
177+ "requests" : len (provisioned_cfgs ),
178+ "errors" : 0 ,
179+ "success" : 0 ,
180+ "wall_time_ms" : 0.0 ,
181+ "p50_ms" : None ,
182+ "p95_ms" : None ,
183+ "p99_ms" : None ,
184+ "error_samples" : [],
185+ }
186+ identity_bootstrap_available = False
187+ results ["note" ] = (
188+ "Multi-user scale measurement unavailable: benchmark users could not be "
189+ "provisioned through /api/v1/users/provision on this runtime."
190+ )
191+
142192 same_p95 = results ["same_session" ].get ("p95_ms" )
143193 multi_p95 = results ["multi_user" ].get ("p95_ms" )
144194 if same_p95 and multi_p95 and multi_p95 > 0 :
@@ -159,7 +209,22 @@ def run(config: BenchConfig) -> dict:
159209 results ["metrics_snapshot" ][parts [0 ]] = parts [1 ]
160210
161211 # Score calculation
162- multi_success_rate = len (multi_latencies ) / concurrency if concurrency > 0 else 0
212+ if not identity_bootstrap_available :
213+ results ["score" ] = 0.0
214+ results ["verified_score" ] = 0.0
215+ results ["projected_score" ] = 0.0
216+ results ["measured_coverage" ] = 0.0
217+ return {
218+ "dimension" : "scale_cost" ,
219+ "score" : results ["score" ],
220+ "verified_score" : results ["verified_score" ],
221+ "projected_score" : results ["projected_score" ],
222+ "measured_coverage" : results ["measured_coverage" ],
223+ "details" : results ,
224+ }
225+
226+ multi_requests = results ["multi_user" ]["requests" ]
227+ multi_success_rate = len (multi_latencies ) / multi_requests if multi_requests > 0 else 0
163228 multi_p95 = results ["multi_user" ].get ("p95_ms" ) or 60000
164229
165230 # Primary measured scale signal should use multi-user throughput, not same-session contention.
0 commit comments