From 8ca346499ad0dae23fa253d554641d8b9f07599a Mon Sep 17 00:00:00 2001 From: dafu-wu Date: Tue, 26 May 2026 07:36:04 +0000 Subject: [PATCH] [agent_loop] fix: use gpt-oss manual tool response format when gpt-oss tokenizer is detected When a model is based on a gpt-oss tokenizer but trained to emit hermes-style output, two incompatible things happen simultaneously: 1. format: hermes correctly parses blocks in the model output. 2. apply_chat_template raises TemplateError because the gpt-oss jinja template does not accept standard role:tool messages: "Message has tool role, but there was no previous assistant message with a tool call!" The gpt-oss path already handles this by calling build_gpt_oss_tool_response_text which manually encodes tool results as gpt-oss channel tokens, bypassing apply_chat_template entirely. Fix: detect the gpt-oss tokenizer at init time by checking for the <|channel|> special token. When detected, always use the manual gpt-oss tool response formatter regardless of the configured tool parser name, so models that output hermes-format tool calls but use a gpt-oss tokenizer do not crash. --- verl/experimental/agent_loop/tool_agent_loop.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/verl/experimental/agent_loop/tool_agent_loop.py b/verl/experimental/agent_loop/tool_agent_loop.py index 3dc94a7c529..1fad226f8a4 100644 --- a/verl/experimental/agent_loop/tool_agent_loop.py +++ b/verl/experimental/agent_loop/tool_agent_loop.py @@ -118,6 +118,15 @@ def __init__(self, *args, tools: Optional[ToolListWrap] = None, **kwargs): self.tool_parser = ToolParser.get_tool_parser(self.rollout_config.multi_turn.format, self.tokenizer) self.tool_parser_name = self.rollout_config.multi_turn.format + # Detect gpt-oss tokenizer by presence of <|channel|> special token. + # When detected, tool responses must be manually formatted (bypassing apply_chat_template) + # regardless of the tool parser, since the gpt-oss jinja template does not support + # standard role:tool messages. + _channel_token_id = self.tokenizer.convert_tokens_to_ids("<|channel|>") + self._is_gpt_oss_tokenizer = ( + _channel_token_id is not None and _channel_token_id != self.tokenizer.unk_token_id + ) + self.prompt_length = self.rollout_config.prompt_length self.response_length = self.rollout_config.response_length @@ -350,7 +359,7 @@ async def _handle_processing_tools_state(self, agent_data: AgentData) -> AgentSt agent_data.messages.extend(add_messages) - if self.tool_parser_name == "gpt-oss": + if self.tool_parser_name == "gpt-oss" or self._is_gpt_oss_tokenizer: logger.info("manually format tool responses for gpt-oss") tool_response_text = build_gpt_oss_tool_response_text(add_messages, tool_call_names) response_ids = await self.loop.run_in_executor(