From 9cfe8b62654e3d31734d2923f2ae406d629a3a64 Mon Sep 17 00:00:00 2001
From: ImIvanGil <ivaang94@gmail.com>
Date: Tue, 12 May 2026 18:27:01 -0600
Subject: [PATCH] feat(ai): support thinking/reasoning models in
 OpenAI-compatible strategy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OpenAI-compatible "thinking" models — Kimi K2.5, K2.6, kimi-*-thinking
variants, GPT o1 family — emit a different stream shape and reject the
default temperature. Without this patch, every request to them fails
silently in the panel with the generic "No response received from the
API." message.

Two changes, both in OpenAiApiStrategy.qml:

1. **Dynamic temperature** in getBody():

   Thinking models require `temperature: 1` and reject anything else
   with HTTP 400 `invalid_request_error` ("only 1 is allowed for this
   model"). The current hardcoded `temperature: 0.7` causes every
   thinking-model request to fail before streaming even starts.

   Fixed by regex-detecting thinking model IDs:
       /k2\.(5|6)|thinking|^o1(-|$)/

   Other models continue to use 0.7 unchanged.

2. **reasoning_content support** in parseStreamChunk() and parseResponse():

   Thinking models emit `delta.reasoning_content` (and `message.reasoning_content`
   in non-stream) BEFORE the final `delta.content`. The existing parser
   only checks `delta.content`, so all reasoning chunks are ignored and
   the response buffer ends up empty.

   With this patch, reasoning_content is treated as content and surfaced
   to the user — they see the model's chain-of-thought streaming in,
   then the final answer concatenated at the end. Same flow as
   ChatGPT/Claude thinking UIs.

This is purely additive — non-thinking models behave identically to
before. Tested with Kimi K2.6 (long thinking) and K2 (0905-preview,
non-thinking) — both work; non-thinking is unchanged.

Note: relies on PR #176 to register custom OpenAI-compatible providers
(Kimi/Moonshot, OpenRouter, etc.) via Config.ai.extraModels. Without
that PR, only providers with built-in fetch (Gemini/OpenAI/etc.) benefit
from the temperature fix here.
---
 .../ai/strategies/OpenAiApiStrategy.qml       | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/modules/services/ai/strategies/OpenAiApiStrategy.qml b/modules/services/ai/strategies/OpenAiApiStrategy.qml
index a8eb6e82..6043620d 100644
--- a/modules/services/ai/strategies/OpenAiApiStrategy.qml
+++ b/modules/services/ai/strategies/OpenAiApiStrategy.qml
@@ -41,10 +41,17 @@ ApiStrategy {
         return formatted;
     }
     function getBody(messages, model, tools) {
+        // Thinking / reasoning models reject temperature != 1 (Kimi K2.5/K2.6,
+        // kimi-*-thinking, GPT o1 family, etc.). Send temperature=1 for those;
+        // keep 0.7 for everything else.
+        let temp = 0.7;
+        if (model.model && /k2\.(5|6)|thinking|^o1(-|$)/.test(model.model)) {
+            temp = 1;
+        }
         let body = {
             model: model.model,
             messages: _formatMessages(messages),
-            temperature: 0.7
+            temperature: temp
         };
         if (tools && tools.length > 0) {
             body.tools = tools.map(t => ({
@@ -80,7 +87,12 @@ ApiStrategy {
                         }
                     };
                 }
-                return { content: msg.content };
+                // Thinking models include reasoning_content alongside (or instead of) content
+                let outContent = msg.content || "";
+                if (msg.reasoning_content && !outContent) {
+                    outContent = msg.reasoning_content;
+                }
+                return { content: outContent };
             }
             if (json.error)
                 return { content: "API Error: " + json.error.message };
@@ -108,6 +120,12 @@ ApiStrategy {
                 if (delta && delta.content)
                     return { content: delta.content, done: false, error: null };
 
+                // Thinking models (Kimi K2.5/K2.6, kimi-*-thinking, GPT o1, etc.)
+                // emit reasoning_content BEFORE the final content. Surface it so the
+                // response buffer fills and the user sees the model's chain-of-thought.
+                if (delta && delta.reasoning_content)
+                    return { content: delta.reasoning_content, done: false, error: null };
+
                 // Check for tool calls in stream
                 if (delta && delta.tool_calls) {
                     // Accumulate tool call data — handled by Ai.qml