From 9cfe8b62654e3d31734d2923f2ae406d629a3a64 Mon Sep 17 00:00:00 2001 From: ImIvanGil Date: Tue, 12 May 2026 18:27:01 -0600 Subject: [PATCH] feat(ai): support thinking/reasoning models in OpenAI-compatible strategy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenAI-compatible "thinking" models — Kimi K2.5, K2.6, kimi-*-thinking variants, GPT o1 family — emit a different stream shape and reject the default temperature. Without this patch, every request to them fails silently in the panel with the generic "No response received from the API." message. Two changes, both in OpenAiApiStrategy.qml: 1. **Dynamic temperature** in getBody(): Thinking models require `temperature: 1` and reject anything else with HTTP 400 `invalid_request_error` ("only 1 is allowed for this model"). The current hardcoded `temperature: 0.7` causes every thinking-model request to fail before streaming even starts. Fixed by regex-detecting thinking model IDs: /k2\.(5|6)|thinking|^o1(-|$)/ Other models continue to use 0.7 unchanged. 2. **reasoning_content support** in parseStreamChunk() and parseResponse(): Thinking models emit `delta.reasoning_content` (and `message.reasoning_content` in non-stream) BEFORE the final `delta.content`. The existing parser only checks `delta.content`, so all reasoning chunks are ignored and the response buffer ends up empty. With this patch, reasoning_content is treated as content and surfaced to the user — they see the model's chain-of-thought streaming in, then the final answer concatenated at the end. Same flow as ChatGPT/Claude thinking UIs. This is purely additive — non-thinking models behave identically to before. Tested with Kimi K2.6 (long thinking) and K2 (0905-preview, non-thinking) — both work; non-thinking is unchanged. Note: relies on PR #176 to register custom OpenAI-compatible providers (Kimi/Moonshot, OpenRouter, etc.) via Config.ai.extraModels. Without that PR, only providers with built-in fetch (Gemini/OpenAI/etc.) benefit from the temperature fix here. --- .../ai/strategies/OpenAiApiStrategy.qml | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/modules/services/ai/strategies/OpenAiApiStrategy.qml b/modules/services/ai/strategies/OpenAiApiStrategy.qml index a8eb6e82..6043620d 100644 --- a/modules/services/ai/strategies/OpenAiApiStrategy.qml +++ b/modules/services/ai/strategies/OpenAiApiStrategy.qml @@ -41,10 +41,17 @@ ApiStrategy { return formatted; } function getBody(messages, model, tools) { + // Thinking / reasoning models reject temperature != 1 (Kimi K2.5/K2.6, + // kimi-*-thinking, GPT o1 family, etc.). Send temperature=1 for those; + // keep 0.7 for everything else. + let temp = 0.7; + if (model.model && /k2\.(5|6)|thinking|^o1(-|$)/.test(model.model)) { + temp = 1; + } let body = { model: model.model, messages: _formatMessages(messages), - temperature: 0.7 + temperature: temp }; if (tools && tools.length > 0) { body.tools = tools.map(t => ({ @@ -80,7 +87,12 @@ ApiStrategy { } }; } - return { content: msg.content }; + // Thinking models include reasoning_content alongside (or instead of) content + let outContent = msg.content || ""; + if (msg.reasoning_content && !outContent) { + outContent = msg.reasoning_content; + } + return { content: outContent }; } if (json.error) return { content: "API Error: " + json.error.message }; @@ -108,6 +120,12 @@ ApiStrategy { if (delta && delta.content) return { content: delta.content, done: false, error: null }; + // Thinking models (Kimi K2.5/K2.6, kimi-*-thinking, GPT o1, etc.) + // emit reasoning_content BEFORE the final content. Surface it so the + // response buffer fills and the user sees the model's chain-of-thought. + if (delta && delta.reasoning_content) + return { content: delta.reasoning_content, done: false, error: null }; + // Check for tool calls in stream if (delta && delta.tool_calls) { // Accumulate tool call data — handled by Ai.qml