@@ -320,12 +320,12 @@ async def _request() -> TextGenerationOutput:
320320
321321 try :
322322 return await self ._call_with_tee_retry ("completion" , _request )
323+ except RuntimeError :
324+ raise
323325 except httpx .HTTPStatusError as e :
324326 if e .response .status_code == 402 :
325327 raise RuntimeError (_402_HINT ) from e
326328 raise RuntimeError (f"TEE LLM completion failed: { e } " ) from e
327- except RuntimeError :
328- raise
329329 except Exception as e :
330330 raise RuntimeError (f"TEE LLM completion failed: { e } " ) from e
331331
@@ -426,14 +426,14 @@ async def _request() -> TextGenerationOutput:
426426
427427 try :
428428 return await self ._call_with_tee_retry ("chat" , _request )
429+ except RuntimeError :
430+ raise
429431 except httpx .HTTPStatusError as e :
430432 # Provide an actionable error message for the very common 402 case
431433 # (issue #188 — users see a cryptic RuntimeError instead of guidance).
432434 if e .response .status_code == 402 :
433435 raise RuntimeError (_402_HINT ) from e
434436 raise RuntimeError (f"TEE LLM chat failed: { e } " ) from e
435- except RuntimeError :
436- raise
437437 except Exception as e :
438438 raise RuntimeError (f"TEE LLM chat failed: { e } " ) from e
439439
@@ -507,9 +507,9 @@ async def _chat_stream(self, params: _ChatParams, messages: List[Dict]) -> Async
507507 async def _parse_sse_response (self , response ) -> AsyncGenerator [StreamChunk , None ]:
508508 """Parse an SSE response stream into StreamChunk objects."""
509509 status_code = getattr (response , "status_code" , None )
510+ if status_code is not None and status_code == 402 :
511+ raise RuntimeError (_402_HINT )
510512 if status_code is not None and status_code >= 400 :
511- if status_code == 402 :
512- raise RuntimeError (_402_HINT )
513513 body = await response .aread ()
514514 raise RuntimeError (f"TEE LLM streaming request failed with status { status_code } : { body .decode ('utf-8' , errors = 'replace' )} " )
515515
0 commit comments