From 8ca346499ad0dae23fa253d554641d8b9f07599a Mon Sep 17 00:00:00 2001
From: dafu-wu <wuchengyi2006@163.com>
Date: Tue, 26 May 2026 07:36:04 +0000
Subject: [PATCH] [agent_loop] fix: use gpt-oss manual tool response format
 when gpt-oss tokenizer is detected

When a model is based on a gpt-oss tokenizer but trained to emit hermes-style
<tool_call> output, two incompatible things happen simultaneously:

1. format: hermes correctly parses <tool_call> blocks in the model output.
2. apply_chat_template raises TemplateError because the gpt-oss jinja template
   does not accept standard role:tool messages:
     "Message has tool role, but there was no previous assistant message
      with a tool call!"

The gpt-oss path already handles this by calling build_gpt_oss_tool_response_text
which manually encodes tool results as gpt-oss channel tokens, bypassing
apply_chat_template entirely.

Fix: detect the gpt-oss tokenizer at init time by checking for the <|channel|>
special token. When detected, always use the manual gpt-oss tool response
formatter regardless of the configured tool parser name, so models that output
hermes-format tool calls but use a gpt-oss tokenizer do not crash.
---
 verl/experimental/agent_loop/tool_agent_loop.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/verl/experimental/agent_loop/tool_agent_loop.py b/verl/experimental/agent_loop/tool_agent_loop.py
index 3dc94a7c529..1fad226f8a4 100644
--- a/verl/experimental/agent_loop/tool_agent_loop.py
+++ b/verl/experimental/agent_loop/tool_agent_loop.py
@@ -118,6 +118,15 @@ def __init__(self, *args, tools: Optional[ToolListWrap] = None, **kwargs):
         self.tool_parser = ToolParser.get_tool_parser(self.rollout_config.multi_turn.format, self.tokenizer)
         self.tool_parser_name = self.rollout_config.multi_turn.format
 
+        # Detect gpt-oss tokenizer by presence of <|channel|> special token.
+        # When detected, tool responses must be manually formatted (bypassing apply_chat_template)
+        # regardless of the tool parser, since the gpt-oss jinja template does not support
+        # standard role:tool messages.
+        _channel_token_id = self.tokenizer.convert_tokens_to_ids("<|channel|>")
+        self._is_gpt_oss_tokenizer = (
+            _channel_token_id is not None and _channel_token_id != self.tokenizer.unk_token_id
+        )
+
         self.prompt_length = self.rollout_config.prompt_length
         self.response_length = self.rollout_config.response_length
 
@@ -350,7 +359,7 @@ async def _handle_processing_tools_state(self, agent_data: AgentData) -> AgentSt
 
         agent_data.messages.extend(add_messages)
 
-        if self.tool_parser_name == "gpt-oss":
+        if self.tool_parser_name == "gpt-oss" or self._is_gpt_oss_tokenizer:
             logger.info("manually format tool responses for gpt-oss")
             tool_response_text = build_gpt_oss_tool_response_text(add_messages, tool_call_names)
             response_ids = await self.loop.run_in_executor(