From 1f9be44dca1a4d611d3283628d18e5da1e9f42ee Mon Sep 17 00:00:00 2001
From: Mackinnon Buck <mackinnon.buck@gmail.com>
Date: Tue, 31 Mar 2026 10:34:35 -0700
Subject: [PATCH 1/5] Add token limit fields to ProviderConfig across all SDKs

Add maxOutputTokens, maxPromptTokens, maxContextWindowTokens, and
modelLimitsId to ProviderConfig in Node.js, Python, .NET, and Go SDKs.
These optional fields allow BYOK users to configure token limits for
custom providers, matching the runtime's ProviderConfig (PR #5311).

Also update the Python wire format conversion to map the new snake_case
fields to camelCase for the JSON-RPC wire protocol.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 dotnet/src/Types.cs       | 29 +++++++++++++++++++++++++++++
 go/types.go               | 13 +++++++++++++
 nodejs/src/types.ts       | 25 +++++++++++++++++++++++++
 python/copilot/client.py  | 10 ++++++++++
 python/copilot/session.py | 13 +++++++++++++
 5 files changed, 90 insertions(+)
diff --git a/dotnet/src/Types.cs b/dotnet/src/Types.cs
index d6530f9c7..b6ab440c9 100644
--- a/dotnet/src/Types.cs
+++ b/dotnet/src/Types.cs
@@ -1115,6 +1115,35 @@ public class ProviderConfig
     /// </summary>
     [JsonPropertyName("azure")]
     public AzureOptions? Azure { get; set; }
+
+    /// <summary>
+    /// Overrides the maximum number of output tokens the model can generate.
+    /// When set, takes precedence over the default limit resolved from the model's capability catalog entry.
+    /// </summary>
+    [JsonPropertyName("maxOutputTokens")]
+    public int? MaxOutputTokens { get; set; }
+
+    /// <summary>
+    /// Overrides the maximum number of prompt/input tokens.
+    /// When set, takes precedence over the default limit resolved from the model's capability catalog entry.
+    /// </summary>
+    [JsonPropertyName("maxPromptTokens")]
+    public int? MaxPromptTokens { get; set; }
+
+    /// <summary>
+    /// Overrides the maximum context window size in tokens.
+    /// When set, takes precedence over the default limit resolved from the model's capability catalog entry.
+    /// </summary>
+    [JsonPropertyName("maxContextWindowTokens")]
+    public int? MaxContextWindowTokens { get; set; }
+
+    /// <summary>
+    /// Specifies the model ID used to look up default token limits from the capability catalog.
+    /// When unset, the session's configured model ID (see <see cref="SessionConfig.Model"/>) is used.
+    /// This is useful for fine-tuned models that share the same limits as a base model.
+    /// </summary>
+    [JsonPropertyName("modelLimitsId")]
+    public string? ModelLimitsId { get; set; }
 }
 
 /// <summary>
diff --git a/go/types.go b/go/types.go
index f888c9b6e..cbe0d53dc 100644
--- a/go/types.go
+++ b/go/types.go
@@ -601,6 +601,19 @@ type ProviderConfig struct {
 	BearerToken string `json:"bearerToken,omitempty"`
 	// Azure contains Azure-specific options
 	Azure *AzureProviderOptions `json:"azure,omitempty"`
+	// MaxOutputTokens overrides the maximum number of output tokens the model can generate.
+	// When set, takes precedence over the default limit from the model's capability catalog entry.
+	MaxOutputTokens int `json:"maxOutputTokens,omitempty"`
+	// MaxPromptTokens overrides the maximum number of prompt/input tokens.
+	// When set, takes precedence over the default limit from the model's capability catalog entry.
+	MaxPromptTokens int `json:"maxPromptTokens,omitempty"`
+	// MaxContextWindowTokens overrides the maximum context window size in tokens.
+	// When set, takes precedence over the default limit from the model's capability catalog entry.
+	MaxContextWindowTokens int `json:"maxContextWindowTokens,omitempty"`
+	// ModelLimitsId specifies the model ID used to look up default token limits from the capability catalog.
+	// When unset, the session's configured model ID is used.
+	// Useful for fine-tuned models that share the same limits as a base model.
+	ModelLimitsId string `json:"modelLimitsId,omitempty"`
 }
 
 // AzureProviderOptions contains Azure-specific provider configuration
diff --git a/nodejs/src/types.ts b/nodejs/src/types.ts
index b4b9e563c..4de9dfb83 100644
--- a/nodejs/src/types.ts
+++ b/nodejs/src/types.ts
@@ -1260,6 +1260,31 @@ export interface ProviderConfig {
          */
         apiVersion?: string;
     };
+
+    /**
+     * Overrides the maximum number of output tokens the model can generate.
+     * When set, takes precedence over the default limit resolved from the model's capability catalog entry.
+     */
+    maxOutputTokens?: number;
+
+    /**
+     * Overrides the maximum number of prompt/input tokens.
+     * When set, takes precedence over the default limit resolved from the model's capability catalog entry.
+     */
+    maxPromptTokens?: number;
+
+    /**
+     * Overrides the maximum context window size in tokens.
+     * When set, takes precedence over the default limit resolved from the model's capability catalog entry.
+     */
+    maxContextWindowTokens?: number;
+
+    /**
+     * Specifies the model ID used to look up default token limits from the capability catalog.
+     * When unset, the session's configured model ID is used.
+     * This is useful for fine-tuned models that share the same limits as a base model.
+     */
+    modelLimitsId?: string;
 }
 
 /**
diff --git a/python/copilot/client.py b/python/copilot/client.py
index ab8074756..64cb567c8 100644
--- a/python/copilot/client.py
+++ b/python/copilot/client.py
@@ -1962,6 +1962,16 @@ def _convert_provider_to_wire_format(
                 wire_azure["apiVersion"] = azure["api_version"]
             if wire_azure:
                 wire_provider["azure"] = wire_azure
+        if "max_output_tokens" in provider:
+            wire_provider["maxOutputTokens"] = provider["max_output_tokens"]
+        if "max_prompt_tokens" in provider:
+            wire_provider["maxPromptTokens"] = provider["max_prompt_tokens"]
+        if "max_context_window_tokens" in provider:
+            wire_provider["maxContextWindowTokens"] = provider[
+                "max_context_window_tokens"
+            ]
+        if "model_limits_id" in provider:
+            wire_provider["modelLimitsId"] = provider["model_limits_id"]
         return wire_provider
 
     def _convert_custom_agent_to_wire_format(
diff --git a/python/copilot/session.py b/python/copilot/session.py
index 019436f7a..1e1b056af 100644
--- a/python/copilot/session.py
+++ b/python/copilot/session.py
@@ -507,6 +507,19 @@ class ProviderConfig(TypedDict, total=False):
     # Takes precedence over api_key when both are set.
     bearer_token: str
     azure: AzureProviderOptions  # Azure-specific options
+    # Overrides the maximum number of output tokens the model can generate.
+    # Takes precedence over the default limit from the model's capability catalog entry.
+    max_output_tokens: int
+    # Overrides the maximum number of prompt/input tokens.
+    # Takes precedence over the default limit from the model's capability catalog entry.
+    max_prompt_tokens: int
+    # Overrides the maximum context window size in tokens.
+    # Takes precedence over the default limit from the model's capability catalog entry.
+    max_context_window_tokens: int
+    # Model ID used to look up default token limits from the capability catalog.
+    # When unset, the session's configured model ID is used.
+    # Useful for fine-tuned models that share the same limits as a base model.
+    model_limits_id: str
 
 
 class SessionConfig(TypedDict, total=False):

From 3c92cedbb44a58872187933d5109627ce9ae3b52 Mon Sep 17 00:00:00 2001
From: Mackinnon Buck <mackinnon.buck@gmail.com>
Date: Tue, 31 Mar 2026 10:55:47 -0700
Subject: [PATCH 2/5] Address PR review: rename ModelLimitsId to ModelLimitsID
 in Go SDK

Fixes Go naming to follow initialism convention (ID not Id), consistent
with existing fields like APIKey, BaseURL, and APIVersion.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 go/types.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/go/types.go b/go/types.go
index cbe0d53dc..7246d29db 100644
--- a/go/types.go
+++ b/go/types.go
@@ -610,10 +610,10 @@ type ProviderConfig struct {
 	// MaxContextWindowTokens overrides the maximum context window size in tokens.
 	// When set, takes precedence over the default limit from the model's capability catalog entry.
 	MaxContextWindowTokens int `json:"maxContextWindowTokens,omitempty"`
-	// ModelLimitsId specifies the model ID used to look up default token limits from the capability catalog.
+	// ModelLimitsID specifies the model ID used to look up default token limits from the capability catalog.
 	// When unset, the session's configured model ID is used.
 	// Useful for fine-tuned models that share the same limits as a base model.
-	ModelLimitsId string `json:"modelLimitsId,omitempty"`
+	ModelLimitsID string `json:"modelLimitsId,omitempty"`
 }
 
 // AzureProviderOptions contains Azure-specific provider configuration

From 7bafc5fa8247582aaab896d9ac12bb880531e0a9 Mon Sep 17 00:00:00 2001
From: Mackinnon Buck <mackinnon.buck@gmail.com>
Date: Tue, 31 Mar 2026 11:18:13 -0700
Subject: [PATCH 3/5] Fix CI: ruff format issue in Python client.py

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 python/copilot/client.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/copilot/client.py b/python/copilot/client.py
index 64cb567c8..9d7a8eb11 100644
--- a/python/copilot/client.py
+++ b/python/copilot/client.py
@@ -1967,9 +1967,7 @@ def _convert_provider_to_wire_format(
         if "max_prompt_tokens" in provider:
             wire_provider["maxPromptTokens"] = provider["max_prompt_tokens"]
         if "max_context_window_tokens" in provider:
-            wire_provider["maxContextWindowTokens"] = provider[
-                "max_context_window_tokens"
-            ]
+            wire_provider["maxContextWindowTokens"] = provider["max_context_window_tokens"]
         if "model_limits_id" in provider:
             wire_provider["modelLimitsId"] = provider["model_limits_id"]
         return wire_provider

From d0d283c903db76cf03d6c7853871dc31f3d8dfeb Mon Sep 17 00:00:00 2001
From: Mackinnon Buck <mackinnon.buck@gmail.com>
Date: Tue, 31 Mar 2026 13:11:11 -0700
Subject: [PATCH 4/5] Remove maxContextWindowTokens and refine token limit docs

Remove maxContextWindowTokens from all SDKs - it is an internal runtime
fallback that should not be exposed as public SDK API.

Refine doc comments for maxOutputTokens and maxPromptTokens to explain
what happens when each limit is hit:
- maxOutputTokens: sent as max_tokens per LLM request; model stops
  generating and returns a truncated response when hit.
- maxPromptTokens: used by the runtime to trigger conversation
  compaction before sending a request when the prompt exceeds this limit.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 dotnet/src/Types.cs       | 17 ++++++-----------
 go/types.go               | 13 ++++++-------
 nodejs/src/types.ts       | 16 ++++++----------
 python/copilot/client.py  |  2 --
 python/copilot/session.py | 14 +++++++-------
 5 files changed, 25 insertions(+), 37 deletions(-)

diff --git a/dotnet/src/Types.cs b/dotnet/src/Types.cs
index b6ab440c9..b2c0096dc 100644
--- a/dotnet/src/Types.cs
+++ b/dotnet/src/Types.cs
@@ -1117,26 +1117,21 @@ public class ProviderConfig
     public AzureOptions? Azure { get; set; }
 
     /// <summary>
-    /// Overrides the maximum number of output tokens the model can generate.
-    /// When set, takes precedence over the default limit resolved from the model's capability catalog entry.
+    /// Maximum number of tokens the model can generate in a single response.
+    /// Sent as <c>max_tokens</c> per LLM API request. When hit, the model stops
+    /// generating and returns a truncated response.
     /// </summary>
     [JsonPropertyName("maxOutputTokens")]
     public int? MaxOutputTokens { get; set; }
 
     /// <summary>
-    /// Overrides the maximum number of prompt/input tokens.
-    /// When set, takes precedence over the default limit resolved from the model's capability catalog entry.
+    /// Maximum number of tokens allowed in the prompt for a single LLM API request.
+    /// Used by the runtime to trigger conversation compaction before sending a request
+    /// when the prompt (system message, history, tool definitions, user message) exceeds this limit.
     /// </summary>
     [JsonPropertyName("maxPromptTokens")]
     public int? MaxPromptTokens { get; set; }
 
-    /// <summary>
-    /// Overrides the maximum context window size in tokens.
-    /// When set, takes precedence over the default limit resolved from the model's capability catalog entry.
-    /// </summary>
-    [JsonPropertyName("maxContextWindowTokens")]
-    public int? MaxContextWindowTokens { get; set; }
-
     /// <summary>
     /// Specifies the model ID used to look up default token limits from the capability catalog.
     /// When unset, the session's configured model ID (see <see cref="SessionConfig.Model"/>) is used.
diff --git a/go/types.go b/go/types.go
index 7246d29db..0687ca5d9 100644
--- a/go/types.go
+++ b/go/types.go
@@ -601,15 +601,14 @@ type ProviderConfig struct {
 	BearerToken string `json:"bearerToken,omitempty"`
 	// Azure contains Azure-specific options
 	Azure *AzureProviderOptions `json:"azure,omitempty"`
-	// MaxOutputTokens overrides the maximum number of output tokens the model can generate.
-	// When set, takes precedence over the default limit from the model's capability catalog entry.
+	// MaxOutputTokens is the maximum number of tokens the model can generate in a single response.
+	// Sent as max_tokens per LLM API request. When hit, the model stops generating and returns
+	// a truncated response.
 	MaxOutputTokens int `json:"maxOutputTokens,omitempty"`
-	// MaxPromptTokens overrides the maximum number of prompt/input tokens.
-	// When set, takes precedence over the default limit from the model's capability catalog entry.
+	// MaxPromptTokens is the maximum number of tokens allowed in the prompt for a single LLM API
+	// request. Used by the runtime to trigger conversation compaction before sending a request
+	// when the prompt (system message, history, tool definitions, user message) exceeds this limit.
 	MaxPromptTokens int `json:"maxPromptTokens,omitempty"`
-	// MaxContextWindowTokens overrides the maximum context window size in tokens.
-	// When set, takes precedence over the default limit from the model's capability catalog entry.
-	MaxContextWindowTokens int `json:"maxContextWindowTokens,omitempty"`
 	// ModelLimitsID specifies the model ID used to look up default token limits from the capability catalog.
 	// When unset, the session's configured model ID is used.
 	// Useful for fine-tuned models that share the same limits as a base model.
diff --git a/nodejs/src/types.ts b/nodejs/src/types.ts
index 4de9dfb83..03d3d05a8 100644
--- a/nodejs/src/types.ts
+++ b/nodejs/src/types.ts
@@ -1262,23 +1262,19 @@ export interface ProviderConfig {
     };
 
     /**
-     * Overrides the maximum number of output tokens the model can generate.
-     * When set, takes precedence over the default limit resolved from the model's capability catalog entry.
+     * Maximum number of tokens the model can generate in a single response.
+     * Sent as {@link https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens max_tokens} per LLM API request.
+     * When hit, the model stops generating and returns a truncated response.
      */
     maxOutputTokens?: number;
 
     /**
-     * Overrides the maximum number of prompt/input tokens.
-     * When set, takes precedence over the default limit resolved from the model's capability catalog entry.
+     * Maximum number of tokens allowed in the prompt for a single LLM API request.
+     * Used by the runtime to trigger conversation compaction before sending a request
+     * when the prompt (system message, history, tool definitions, user message) exceeds this limit.
      */
     maxPromptTokens?: number;
 
-    /**
-     * Overrides the maximum context window size in tokens.
-     * When set, takes precedence over the default limit resolved from the model's capability catalog entry.
-     */
-    maxContextWindowTokens?: number;
-
     /**
      * Specifies the model ID used to look up default token limits from the capability catalog.
      * When unset, the session's configured model ID is used.
diff --git a/python/copilot/client.py b/python/copilot/client.py
index 9d7a8eb11..5ea9e08f3 100644
--- a/python/copilot/client.py
+++ b/python/copilot/client.py
@@ -1966,8 +1966,6 @@ def _convert_provider_to_wire_format(
             wire_provider["maxOutputTokens"] = provider["max_output_tokens"]
         if "max_prompt_tokens" in provider:
             wire_provider["maxPromptTokens"] = provider["max_prompt_tokens"]
-        if "max_context_window_tokens" in provider:
-            wire_provider["maxContextWindowTokens"] = provider["max_context_window_tokens"]
         if "model_limits_id" in provider:
             wire_provider["modelLimitsId"] = provider["model_limits_id"]
         return wire_provider
diff --git a/python/copilot/session.py b/python/copilot/session.py
index 1e1b056af..bc22b3aac 100644
--- a/python/copilot/session.py
+++ b/python/copilot/session.py
@@ -507,15 +507,15 @@ class ProviderConfig(TypedDict, total=False):
     # Takes precedence over api_key when both are set.
     bearer_token: str
     azure: AzureProviderOptions  # Azure-specific options
-    # Overrides the maximum number of output tokens the model can generate.
-    # Takes precedence over the default limit from the model's capability catalog entry.
+    # Maximum number of tokens the model can generate in a single response.
+    # Sent as max_tokens per LLM API request. When hit, the model stops
+    # generating and returns a truncated response.
     max_output_tokens: int
-    # Overrides the maximum number of prompt/input tokens.
-    # Takes precedence over the default limit from the model's capability catalog entry.
+    # Maximum number of tokens allowed in the prompt for a single LLM API request.
+    # Used by the runtime to trigger conversation compaction before sending a
+    # request when the prompt (system message, history, tool definitions, user
+    # message) exceeds this limit.
     max_prompt_tokens: int
-    # Overrides the maximum context window size in tokens.
-    # Takes precedence over the default limit from the model's capability catalog entry.
-    max_context_window_tokens: int
     # Model ID used to look up default token limits from the capability catalog.
     # When unset, the session's configured model ID is used.
     # Useful for fine-tuned models that share the same limits as a base model.

From 47d07813dbeaaa11764c392751568ecdc5922e95 Mon Sep 17 00:00:00 2001
From: Mackinnon Buck <mackinnon.buck@gmail.com>
Date: Tue, 31 Mar 2026 13:22:24 -0700
Subject: [PATCH 5/5] Remove max_tokens implementation detail from doc comments

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 dotnet/src/Types.cs       | 3 +--
 go/types.go               | 3 +--
 nodejs/src/types.ts       | 1 -
 python/copilot/session.py | 3 +--
 4 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/dotnet/src/Types.cs b/dotnet/src/Types.cs
index b2c0096dc..42e0910f4 100644
--- a/dotnet/src/Types.cs
+++ b/dotnet/src/Types.cs
@@ -1118,8 +1118,7 @@ public class ProviderConfig
 
     /// <summary>
     /// Maximum number of tokens the model can generate in a single response.
-    /// Sent as <c>max_tokens</c> per LLM API request. When hit, the model stops
-    /// generating and returns a truncated response.
+    /// When hit, the model stops generating and returns a truncated response.
     /// </summary>
     [JsonPropertyName("maxOutputTokens")]
     public int? MaxOutputTokens { get; set; }
diff --git a/go/types.go b/go/types.go
index 0687ca5d9..1c3bd004b 100644
--- a/go/types.go
+++ b/go/types.go
@@ -602,8 +602,7 @@ type ProviderConfig struct {
 	// Azure contains Azure-specific options
 	Azure *AzureProviderOptions `json:"azure,omitempty"`
 	// MaxOutputTokens is the maximum number of tokens the model can generate in a single response.
-	// Sent as max_tokens per LLM API request. When hit, the model stops generating and returns
-	// a truncated response.
+	// When hit, the model stops generating and returns a truncated response.
 	MaxOutputTokens int `json:"maxOutputTokens,omitempty"`
 	// MaxPromptTokens is the maximum number of tokens allowed in the prompt for a single LLM API
 	// request. Used by the runtime to trigger conversation compaction before sending a request
diff --git a/nodejs/src/types.ts b/nodejs/src/types.ts
index 03d3d05a8..9c40fd38d 100644
--- a/nodejs/src/types.ts
+++ b/nodejs/src/types.ts
@@ -1263,7 +1263,6 @@ export interface ProviderConfig {
 
     /**
      * Maximum number of tokens the model can generate in a single response.
-     * Sent as {@link https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens max_tokens} per LLM API request.
      * When hit, the model stops generating and returns a truncated response.
      */
     maxOutputTokens?: number;
diff --git a/python/copilot/session.py b/python/copilot/session.py
index bc22b3aac..10e331b9f 100644
--- a/python/copilot/session.py
+++ b/python/copilot/session.py
@@ -508,8 +508,7 @@ class ProviderConfig(TypedDict, total=False):
     bearer_token: str
     azure: AzureProviderOptions  # Azure-specific options
     # Maximum number of tokens the model can generate in a single response.
-    # Sent as max_tokens per LLM API request. When hit, the model stops
-    # generating and returns a truncated response.
+    # When hit, the model stops generating and returns a truncated response.
     max_output_tokens: int
     # Maximum number of tokens allowed in the prompt for a single LLM API request.
     # Used by the runtime to trigger conversation compaction before sending a