pass through the model's response ID

qbc2016 · qbc2016 · commit 92a3d55f90f0 · 2026-03-17T16:29:59.000+08:00
diff --git a/src/agentscope/agent/_react_agent.py b/src/agentscope/agent/_react_agent.py
@@ -584,6 +584,7 @@ async def _reasoning(
                 msg = Msg(name=self.name, content=[], role="assistant")
                 if self.model.stream:
                     async for content_chunk in res:
+                        msg.invocation_id = content_chunk.id
                         msg.content = content_chunk.content
 
                         # The speech generated from multimodal (audio) models
@@ -601,6 +602,7 @@ async def _reasoning(
                         await self.print(msg, False, speech=speech)
 
                 else:
+                    msg.invocation_id = res.id
                     msg.content = list(res.content)
 
                 if self.tts_model:
@@ -757,6 +759,7 @@ async def _summarizing(self) -> Msg:
             res_msg = Msg(self.name, [], "assistant")
             if isinstance(res, AsyncGenerator):
                 async for chunk in res:
+                    res_msg.invocation_id = chunk.id
                     res_msg.content = chunk.content
 
                     # The speech generated from multimodal (audio) models
@@ -774,6 +777,7 @@ async def _summarizing(self) -> Msg:
                     await self.print(res_msg, False, speech=speech)
 
             else:
+                res_msg.invocation_id = res.id
                 res_msg.content = res.content
 
             if self.tts_model:
diff --git a/src/agentscope/model/_anthropic_model.py b/src/agentscope/model/_anthropic_model.py
@@ -348,13 +348,16 @@ async def _parse_anthropic_completion_response(
                 time=(datetime.now() - start_datetime).total_seconds(),
             )
 
-        parsed_response = ChatResponse(
-            content=content_blocks,
-            usage=usage,
-            metadata=metadata,
-        )
+        resp_kwargs: dict[str, Any] = {
+            "content": content_blocks,
+            "usage": usage,
+            "metadata": metadata,
+        }
+        response_id = getattr(response, "id", None)
+        if response_id:
+            resp_kwargs["id"] = response_id
 
-        return parsed_response
+        return ChatResponse(**resp_kwargs)
 
     async def _parse_anthropic_stream_completion_response(
         self,
@@ -386,6 +389,7 @@ async def _parse_anthropic_stream_completion_response(
         """
 
         usage = None
+        response_id: str | None = None
         text_buffer = ""
         thinking_buffer = ""
         thinking_signature = ""
@@ -404,6 +408,8 @@ async def _parse_anthropic_stream_completion_response(
 
             if event.type == "message_start":
                 message = event.message
+                if response_id is None:
+                    response_id = getattr(message, "id", None)
                 if message.usage:
                     usage = ChatUsage(
                         input_tokens=message.usage.input_tokens,
@@ -504,11 +510,14 @@ async def _parse_anthropic_stream_completion_response(
                         metadata = repaired_input
 
                 if contents:
-                    res = ChatResponse(
-                        content=contents,
-                        usage=usage,
-                        metadata=metadata,
-                    )
+                    _kwargs: dict[str, Any] = {
+                        "content": contents,
+                        "usage": usage,
+                        "metadata": metadata,
+                    }
+                    if response_id:
+                        _kwargs["id"] = response_id
+                    res = ChatResponse(**_kwargs)
                     yield res
                     last_content = copy.deepcopy(contents)
 
@@ -525,11 +534,14 @@ async def _parse_anthropic_stream_completion_response(
                     if structured_model:
                         metadata = input_obj
 
-            yield ChatResponse(
-                content=last_content,
-                usage=usage,
-                metadata=metadata,
-            )
+            _final_kwargs: dict[str, Any] = {
+                "content": last_content,
+                "usage": usage,
+                "metadata": metadata,
+            }
+            if response_id:
+                _final_kwargs["id"] = response_id
+            yield ChatResponse(**_final_kwargs)
 
     def _format_tools_json_schemas(
         self,
diff --git a/src/agentscope/model/_dashscope_model.py b/src/agentscope/model/_dashscope_model.py
@@ -340,13 +340,17 @@ async def _parse_dashscope_stream_response(
         metadata = None
         last_content = None
         usage = None
+        response_id: str | None = None
 
         async for chunk in giter(response):
             if chunk.status_code != HTTPStatus.OK:
                 raise RuntimeError(
                     f"Failed to get response from _ API: {chunk}",
                 )
 
+            if response_id is None:
+                response_id = getattr(chunk, "request_id", None)
+
             message = chunk.output.choices[0].message
 
             # Update reasoning content
@@ -451,11 +455,14 @@ async def _parse_dashscope_stream_response(
                 )
 
             if content_blocks:
-                parsed_chunk = ChatResponse(
-                    content=content_blocks,
-                    usage=usage,
-                    metadata=metadata,
-                )
+                _kwargs: dict[str, Any] = {
+                    "content": content_blocks,
+                    "usage": usage,
+                    "metadata": metadata,
+                }
+                if response_id:
+                    _kwargs["id"] = response_id
+                parsed_chunk = ChatResponse(**_kwargs)
                 yield parsed_chunk
                 last_content = copy.deepcopy(content_blocks)
 
@@ -473,11 +480,14 @@ async def _parse_dashscope_stream_response(
                     if structured_model:
                         metadata = input_obj
 
-            yield ChatResponse(
-                content=last_content,
-                usage=usage,
-                metadata=metadata,
-            )
+            _final_kwargs: dict[str, Any] = {
+                "content": last_content,
+                "usage": usage,
+                "metadata": metadata,
+            }
+            if response_id:
+                _final_kwargs["id"] = response_id
+            yield ChatResponse(**_final_kwargs)
 
     async def _parse_dashscope_generation_response(
         self,
@@ -574,13 +584,16 @@ async def _parse_dashscope_generation_response(
                 metadata=response.usage,
             )
 
-        parsed_response = ChatResponse(
-            content=content_blocks,
-            usage=usage,
-            metadata=metadata,
-        )
+        resp_kwargs: dict[str, Any] = {
+            "content": content_blocks,
+            "usage": usage,
+            "metadata": metadata,
+        }
+        response_id = getattr(response, "request_id", None)
+        if response_id:
+            resp_kwargs["id"] = response_id
 
-        return parsed_response
+        return ChatResponse(**resp_kwargs)
 
     def _format_tools_json_schemas(
         self,
diff --git a/src/agentscope/model/_gemini_model.py b/src/agentscope/model/_gemini_model.py
@@ -333,6 +333,7 @@ def _extract_usage(
             )
         return None
 
+    # pylint: disable=too-many-branches
     async def _parse_gemini_stream_generation_response(
         self,
         start_datetime: datetime,
@@ -366,6 +367,7 @@ async def _parse_gemini_stream_generation_response(
         thinking = ""
         tool_calls: list[ToolUseBlock] = []
         metadata: dict | None = None
+        response_id: str | None = None
         async for chunk in response:
             if (
                 chunk.candidates
@@ -434,11 +436,17 @@ async def _parse_gemini_stream_generation_response(
                     ),
                 )
 
-            yield ChatResponse(
-                content=content_blocks + tool_calls,
-                usage=usage,
-                metadata=metadata,
-            )
+            if response_id is None:
+                response_id = getattr(chunk, "response_id", None)
+
+            _kwargs: dict[str, Any] = {
+                "content": content_blocks + tool_calls,
+                "usage": usage,
+                "metadata": metadata,
+            }
+            if response_id:
+                _kwargs["id"] = response_id
+            yield ChatResponse(**_kwargs)
 
     def _parse_gemini_generation_response(
         self,
@@ -527,11 +535,16 @@ def _parse_gemini_generation_response(
 
         usage = self._extract_usage(response.usage_metadata, start_datetime)
 
-        return ChatResponse(
-            content=content_blocks + tool_calls,
-            usage=usage,
-            metadata=metadata,
-        )
+        resp_kwargs: dict[str, Any] = {
+            "content": content_blocks + tool_calls,
+            "usage": usage,
+            "metadata": metadata,
+        }
+        response_id = getattr(response, "response_id", None)
+        if response_id:
+            resp_kwargs["id"] = response_id
+
+        return ChatResponse(**resp_kwargs)
 
     def _format_tools_json_schemas(
         self,
diff --git a/src/agentscope/model/_ollama_model.py b/src/agentscope/model/_ollama_model.py
@@ -204,6 +204,7 @@ async def _parse_ollama_stream_completion_response(
         acc_thinking_content = ""
         tool_calls = OrderedDict()  # Store tool calls
         metadata: dict | None = None
+        response_id: str | None = None
 
         async for chunk in response:
             # Handle text content
@@ -262,13 +263,19 @@ async def _parse_ollama_stream_completion_response(
                 except Exception as e:
                     print(f"Error parsing tool call input: {e}")
 
+            if response_id is None:
+                response_id = getattr(chunk, "id", None)
+
             # Generate response when there's new content or at final chunk
             if chunk.done or contents:
-                res = ChatResponse(
-                    content=contents,
-                    usage=usage,
-                    metadata=metadata,
-                )
+                _kwargs: dict[str, Any] = {
+                    "content": contents,
+                    "usage": usage,
+                    "metadata": metadata,
+                }
+                if response_id:
+                    _kwargs["id"] = response_id
+                res = ChatResponse(**_kwargs)
                 yield res
 
     async def _parse_ollama_completion_response(
@@ -339,13 +346,16 @@ async def _parse_ollama_completion_response(
                 time=(datetime.now() - start_datetime).total_seconds(),
             )
 
-        parsed_response = ChatResponse(
-            content=content_blocks,
-            usage=usage,
-            metadata=metadata,
-        )
+        resp_kwargs: dict[str, Any] = {
+            "content": content_blocks,
+            "usage": usage,
+            "metadata": metadata,
+        }
+        response_id = getattr(response, "id", None)
+        if response_id:
+            resp_kwargs["id"] = response_id
 
-        return parsed_response
+        return ChatResponse(**resp_kwargs)
 
     def _format_tools_json_schemas(
         self,
diff --git a/src/agentscope/model/_openai_model.py b/src/agentscope/model/_openai_model.py