From 1f9be44dca1a4d611d3283628d18e5da1e9f42ee Mon Sep 17 00:00:00 2001 From: Mackinnon Buck Date: Tue, 31 Mar 2026 10:34:35 -0700 Subject: [PATCH 1/5] Add token limit fields to ProviderConfig across all SDKs Add maxOutputTokens, maxPromptTokens, maxContextWindowTokens, and modelLimitsId to ProviderConfig in Node.js, Python, .NET, and Go SDKs. These optional fields allow BYOK users to configure token limits for custom providers, matching the runtime's ProviderConfig (PR #5311). Also update the Python wire format conversion to map the new snake_case fields to camelCase for the JSON-RPC wire protocol. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- dotnet/src/Types.cs | 29 +++++++++++++++++++++++++++++ go/types.go | 13 +++++++++++++ nodejs/src/types.ts | 25 +++++++++++++++++++++++++ python/copilot/client.py | 10 ++++++++++ python/copilot/session.py | 13 +++++++++++++ 5 files changed, 90 insertions(+) diff --git a/dotnet/src/Types.cs b/dotnet/src/Types.cs index d6530f9c7..b6ab440c9 100644 --- a/dotnet/src/Types.cs +++ b/dotnet/src/Types.cs @@ -1115,6 +1115,35 @@ public class ProviderConfig /// [JsonPropertyName("azure")] public AzureOptions? Azure { get; set; } + + /// + /// Overrides the maximum number of output tokens the model can generate. + /// When set, takes precedence over the default limit resolved from the model's capability catalog entry. + /// + [JsonPropertyName("maxOutputTokens")] + public int? MaxOutputTokens { get; set; } + + /// + /// Overrides the maximum number of prompt/input tokens. + /// When set, takes precedence over the default limit resolved from the model's capability catalog entry. + /// + [JsonPropertyName("maxPromptTokens")] + public int? MaxPromptTokens { get; set; } + + /// + /// Overrides the maximum context window size in tokens. + /// When set, takes precedence over the default limit resolved from the model's capability catalog entry. + /// + [JsonPropertyName("maxContextWindowTokens")] + public int? MaxContextWindowTokens { get; set; } + + /// + /// Specifies the model ID used to look up default token limits from the capability catalog. + /// When unset, the session's configured model ID (see ) is used. + /// This is useful for fine-tuned models that share the same limits as a base model. + /// + [JsonPropertyName("modelLimitsId")] + public string? ModelLimitsId { get; set; } } /// diff --git a/go/types.go b/go/types.go index f888c9b6e..cbe0d53dc 100644 --- a/go/types.go +++ b/go/types.go @@ -601,6 +601,19 @@ type ProviderConfig struct { BearerToken string `json:"bearerToken,omitempty"` // Azure contains Azure-specific options Azure *AzureProviderOptions `json:"azure,omitempty"` + // MaxOutputTokens overrides the maximum number of output tokens the model can generate. + // When set, takes precedence over the default limit from the model's capability catalog entry. + MaxOutputTokens int `json:"maxOutputTokens,omitempty"` + // MaxPromptTokens overrides the maximum number of prompt/input tokens. + // When set, takes precedence over the default limit from the model's capability catalog entry. + MaxPromptTokens int `json:"maxPromptTokens,omitempty"` + // MaxContextWindowTokens overrides the maximum context window size in tokens. + // When set, takes precedence over the default limit from the model's capability catalog entry. + MaxContextWindowTokens int `json:"maxContextWindowTokens,omitempty"` + // ModelLimitsId specifies the model ID used to look up default token limits from the capability catalog. + // When unset, the session's configured model ID is used. + // Useful for fine-tuned models that share the same limits as a base model. + ModelLimitsId string `json:"modelLimitsId,omitempty"` } // AzureProviderOptions contains Azure-specific provider configuration diff --git a/nodejs/src/types.ts b/nodejs/src/types.ts index b4b9e563c..4de9dfb83 100644 --- a/nodejs/src/types.ts +++ b/nodejs/src/types.ts @@ -1260,6 +1260,31 @@ export interface ProviderConfig { */ apiVersion?: string; }; + + /** + * Overrides the maximum number of output tokens the model can generate. + * When set, takes precedence over the default limit resolved from the model's capability catalog entry. + */ + maxOutputTokens?: number; + + /** + * Overrides the maximum number of prompt/input tokens. + * When set, takes precedence over the default limit resolved from the model's capability catalog entry. + */ + maxPromptTokens?: number; + + /** + * Overrides the maximum context window size in tokens. + * When set, takes precedence over the default limit resolved from the model's capability catalog entry. + */ + maxContextWindowTokens?: number; + + /** + * Specifies the model ID used to look up default token limits from the capability catalog. + * When unset, the session's configured model ID is used. + * This is useful for fine-tuned models that share the same limits as a base model. + */ + modelLimitsId?: string; } /** diff --git a/python/copilot/client.py b/python/copilot/client.py index ab8074756..64cb567c8 100644 --- a/python/copilot/client.py +++ b/python/copilot/client.py @@ -1962,6 +1962,16 @@ def _convert_provider_to_wire_format( wire_azure["apiVersion"] = azure["api_version"] if wire_azure: wire_provider["azure"] = wire_azure + if "max_output_tokens" in provider: + wire_provider["maxOutputTokens"] = provider["max_output_tokens"] + if "max_prompt_tokens" in provider: + wire_provider["maxPromptTokens"] = provider["max_prompt_tokens"] + if "max_context_window_tokens" in provider: + wire_provider["maxContextWindowTokens"] = provider[ + "max_context_window_tokens" + ] + if "model_limits_id" in provider: + wire_provider["modelLimitsId"] = provider["model_limits_id"] return wire_provider def _convert_custom_agent_to_wire_format( diff --git a/python/copilot/session.py b/python/copilot/session.py index 019436f7a..1e1b056af 100644 --- a/python/copilot/session.py +++ b/python/copilot/session.py @@ -507,6 +507,19 @@ class ProviderConfig(TypedDict, total=False): # Takes precedence over api_key when both are set. bearer_token: str azure: AzureProviderOptions # Azure-specific options + # Overrides the maximum number of output tokens the model can generate. + # Takes precedence over the default limit from the model's capability catalog entry. + max_output_tokens: int + # Overrides the maximum number of prompt/input tokens. + # Takes precedence over the default limit from the model's capability catalog entry. + max_prompt_tokens: int + # Overrides the maximum context window size in tokens. + # Takes precedence over the default limit from the model's capability catalog entry. + max_context_window_tokens: int + # Model ID used to look up default token limits from the capability catalog. + # When unset, the session's configured model ID is used. + # Useful for fine-tuned models that share the same limits as a base model. + model_limits_id: str class SessionConfig(TypedDict, total=False): From 3c92cedbb44a58872187933d5109627ce9ae3b52 Mon Sep 17 00:00:00 2001 From: Mackinnon Buck Date: Tue, 31 Mar 2026 10:55:47 -0700 Subject: [PATCH 2/5] Address PR review: rename ModelLimitsId to ModelLimitsID in Go SDK Fixes Go naming to follow initialism convention (ID not Id), consistent with existing fields like APIKey, BaseURL, and APIVersion. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- go/types.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/types.go b/go/types.go index cbe0d53dc..7246d29db 100644 --- a/go/types.go +++ b/go/types.go @@ -610,10 +610,10 @@ type ProviderConfig struct { // MaxContextWindowTokens overrides the maximum context window size in tokens. // When set, takes precedence over the default limit from the model's capability catalog entry. MaxContextWindowTokens int `json:"maxContextWindowTokens,omitempty"` - // ModelLimitsId specifies the model ID used to look up default token limits from the capability catalog. + // ModelLimitsID specifies the model ID used to look up default token limits from the capability catalog. // When unset, the session's configured model ID is used. // Useful for fine-tuned models that share the same limits as a base model. - ModelLimitsId string `json:"modelLimitsId,omitempty"` + ModelLimitsID string `json:"modelLimitsId,omitempty"` } // AzureProviderOptions contains Azure-specific provider configuration From 7bafc5fa8247582aaab896d9ac12bb880531e0a9 Mon Sep 17 00:00:00 2001 From: Mackinnon Buck Date: Tue, 31 Mar 2026 11:18:13 -0700 Subject: [PATCH 3/5] Fix CI: ruff format issue in Python client.py Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/copilot/client.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/copilot/client.py b/python/copilot/client.py index 64cb567c8..9d7a8eb11 100644 --- a/python/copilot/client.py +++ b/python/copilot/client.py @@ -1967,9 +1967,7 @@ def _convert_provider_to_wire_format( if "max_prompt_tokens" in provider: wire_provider["maxPromptTokens"] = provider["max_prompt_tokens"] if "max_context_window_tokens" in provider: - wire_provider["maxContextWindowTokens"] = provider[ - "max_context_window_tokens" - ] + wire_provider["maxContextWindowTokens"] = provider["max_context_window_tokens"] if "model_limits_id" in provider: wire_provider["modelLimitsId"] = provider["model_limits_id"] return wire_provider From d0d283c903db76cf03d6c7853871dc31f3d8dfeb Mon Sep 17 00:00:00 2001 From: Mackinnon Buck Date: Tue, 31 Mar 2026 13:11:11 -0700 Subject: [PATCH 4/5] Remove maxContextWindowTokens and refine token limit docs Remove maxContextWindowTokens from all SDKs - it is an internal runtime fallback that should not be exposed as public SDK API. Refine doc comments for maxOutputTokens and maxPromptTokens to explain what happens when each limit is hit: - maxOutputTokens: sent as max_tokens per LLM request; model stops generating and returns a truncated response when hit. - maxPromptTokens: used by the runtime to trigger conversation compaction before sending a request when the prompt exceeds this limit. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- dotnet/src/Types.cs | 17 ++++++----------- go/types.go | 13 ++++++------- nodejs/src/types.ts | 16 ++++++---------- python/copilot/client.py | 2 -- python/copilot/session.py | 14 +++++++------- 5 files changed, 25 insertions(+), 37 deletions(-) diff --git a/dotnet/src/Types.cs b/dotnet/src/Types.cs index b6ab440c9..b2c0096dc 100644 --- a/dotnet/src/Types.cs +++ b/dotnet/src/Types.cs @@ -1117,26 +1117,21 @@ public class ProviderConfig public AzureOptions? Azure { get; set; } /// - /// Overrides the maximum number of output tokens the model can generate. - /// When set, takes precedence over the default limit resolved from the model's capability catalog entry. + /// Maximum number of tokens the model can generate in a single response. + /// Sent as max_tokens per LLM API request. When hit, the model stops + /// generating and returns a truncated response. /// [JsonPropertyName("maxOutputTokens")] public int? MaxOutputTokens { get; set; } /// - /// Overrides the maximum number of prompt/input tokens. - /// When set, takes precedence over the default limit resolved from the model's capability catalog entry. + /// Maximum number of tokens allowed in the prompt for a single LLM API request. + /// Used by the runtime to trigger conversation compaction before sending a request + /// when the prompt (system message, history, tool definitions, user message) exceeds this limit. /// [JsonPropertyName("maxPromptTokens")] public int? MaxPromptTokens { get; set; } - /// - /// Overrides the maximum context window size in tokens. - /// When set, takes precedence over the default limit resolved from the model's capability catalog entry. - /// - [JsonPropertyName("maxContextWindowTokens")] - public int? MaxContextWindowTokens { get; set; } - /// /// Specifies the model ID used to look up default token limits from the capability catalog. /// When unset, the session's configured model ID (see ) is used. diff --git a/go/types.go b/go/types.go index 7246d29db..0687ca5d9 100644 --- a/go/types.go +++ b/go/types.go @@ -601,15 +601,14 @@ type ProviderConfig struct { BearerToken string `json:"bearerToken,omitempty"` // Azure contains Azure-specific options Azure *AzureProviderOptions `json:"azure,omitempty"` - // MaxOutputTokens overrides the maximum number of output tokens the model can generate. - // When set, takes precedence over the default limit from the model's capability catalog entry. + // MaxOutputTokens is the maximum number of tokens the model can generate in a single response. + // Sent as max_tokens per LLM API request. When hit, the model stops generating and returns + // a truncated response. MaxOutputTokens int `json:"maxOutputTokens,omitempty"` - // MaxPromptTokens overrides the maximum number of prompt/input tokens. - // When set, takes precedence over the default limit from the model's capability catalog entry. + // MaxPromptTokens is the maximum number of tokens allowed in the prompt for a single LLM API + // request. Used by the runtime to trigger conversation compaction before sending a request + // when the prompt (system message, history, tool definitions, user message) exceeds this limit. MaxPromptTokens int `json:"maxPromptTokens,omitempty"` - // MaxContextWindowTokens overrides the maximum context window size in tokens. - // When set, takes precedence over the default limit from the model's capability catalog entry. - MaxContextWindowTokens int `json:"maxContextWindowTokens,omitempty"` // ModelLimitsID specifies the model ID used to look up default token limits from the capability catalog. // When unset, the session's configured model ID is used. // Useful for fine-tuned models that share the same limits as a base model. diff --git a/nodejs/src/types.ts b/nodejs/src/types.ts index 4de9dfb83..03d3d05a8 100644 --- a/nodejs/src/types.ts +++ b/nodejs/src/types.ts @@ -1262,23 +1262,19 @@ export interface ProviderConfig { }; /** - * Overrides the maximum number of output tokens the model can generate. - * When set, takes precedence over the default limit resolved from the model's capability catalog entry. + * Maximum number of tokens the model can generate in a single response. + * Sent as {@link https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens max_tokens} per LLM API request. + * When hit, the model stops generating and returns a truncated response. */ maxOutputTokens?: number; /** - * Overrides the maximum number of prompt/input tokens. - * When set, takes precedence over the default limit resolved from the model's capability catalog entry. + * Maximum number of tokens allowed in the prompt for a single LLM API request. + * Used by the runtime to trigger conversation compaction before sending a request + * when the prompt (system message, history, tool definitions, user message) exceeds this limit. */ maxPromptTokens?: number; - /** - * Overrides the maximum context window size in tokens. - * When set, takes precedence over the default limit resolved from the model's capability catalog entry. - */ - maxContextWindowTokens?: number; - /** * Specifies the model ID used to look up default token limits from the capability catalog. * When unset, the session's configured model ID is used. diff --git a/python/copilot/client.py b/python/copilot/client.py index 9d7a8eb11..5ea9e08f3 100644 --- a/python/copilot/client.py +++ b/python/copilot/client.py @@ -1966,8 +1966,6 @@ def _convert_provider_to_wire_format( wire_provider["maxOutputTokens"] = provider["max_output_tokens"] if "max_prompt_tokens" in provider: wire_provider["maxPromptTokens"] = provider["max_prompt_tokens"] - if "max_context_window_tokens" in provider: - wire_provider["maxContextWindowTokens"] = provider["max_context_window_tokens"] if "model_limits_id" in provider: wire_provider["modelLimitsId"] = provider["model_limits_id"] return wire_provider diff --git a/python/copilot/session.py b/python/copilot/session.py index 1e1b056af..bc22b3aac 100644 --- a/python/copilot/session.py +++ b/python/copilot/session.py @@ -507,15 +507,15 @@ class ProviderConfig(TypedDict, total=False): # Takes precedence over api_key when both are set. bearer_token: str azure: AzureProviderOptions # Azure-specific options - # Overrides the maximum number of output tokens the model can generate. - # Takes precedence over the default limit from the model's capability catalog entry. + # Maximum number of tokens the model can generate in a single response. + # Sent as max_tokens per LLM API request. When hit, the model stops + # generating and returns a truncated response. max_output_tokens: int - # Overrides the maximum number of prompt/input tokens. - # Takes precedence over the default limit from the model's capability catalog entry. + # Maximum number of tokens allowed in the prompt for a single LLM API request. + # Used by the runtime to trigger conversation compaction before sending a + # request when the prompt (system message, history, tool definitions, user + # message) exceeds this limit. max_prompt_tokens: int - # Overrides the maximum context window size in tokens. - # Takes precedence over the default limit from the model's capability catalog entry. - max_context_window_tokens: int # Model ID used to look up default token limits from the capability catalog. # When unset, the session's configured model ID is used. # Useful for fine-tuned models that share the same limits as a base model. From 47d07813dbeaaa11764c392751568ecdc5922e95 Mon Sep 17 00:00:00 2001 From: Mackinnon Buck Date: Tue, 31 Mar 2026 13:22:24 -0700 Subject: [PATCH 5/5] Remove max_tokens implementation detail from doc comments Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- dotnet/src/Types.cs | 3 +-- go/types.go | 3 +-- nodejs/src/types.ts | 1 - python/copilot/session.py | 3 +-- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/dotnet/src/Types.cs b/dotnet/src/Types.cs index b2c0096dc..42e0910f4 100644 --- a/dotnet/src/Types.cs +++ b/dotnet/src/Types.cs @@ -1118,8 +1118,7 @@ public class ProviderConfig /// /// Maximum number of tokens the model can generate in a single response. - /// Sent as max_tokens per LLM API request. When hit, the model stops - /// generating and returns a truncated response. + /// When hit, the model stops generating and returns a truncated response. /// [JsonPropertyName("maxOutputTokens")] public int? MaxOutputTokens { get; set; } diff --git a/go/types.go b/go/types.go index 0687ca5d9..1c3bd004b 100644 --- a/go/types.go +++ b/go/types.go @@ -602,8 +602,7 @@ type ProviderConfig struct { // Azure contains Azure-specific options Azure *AzureProviderOptions `json:"azure,omitempty"` // MaxOutputTokens is the maximum number of tokens the model can generate in a single response. - // Sent as max_tokens per LLM API request. When hit, the model stops generating and returns - // a truncated response. + // When hit, the model stops generating and returns a truncated response. MaxOutputTokens int `json:"maxOutputTokens,omitempty"` // MaxPromptTokens is the maximum number of tokens allowed in the prompt for a single LLM API // request. Used by the runtime to trigger conversation compaction before sending a request diff --git a/nodejs/src/types.ts b/nodejs/src/types.ts index 03d3d05a8..9c40fd38d 100644 --- a/nodejs/src/types.ts +++ b/nodejs/src/types.ts @@ -1263,7 +1263,6 @@ export interface ProviderConfig { /** * Maximum number of tokens the model can generate in a single response. - * Sent as {@link https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens max_tokens} per LLM API request. * When hit, the model stops generating and returns a truncated response. */ maxOutputTokens?: number; diff --git a/python/copilot/session.py b/python/copilot/session.py index bc22b3aac..10e331b9f 100644 --- a/python/copilot/session.py +++ b/python/copilot/session.py @@ -508,8 +508,7 @@ class ProviderConfig(TypedDict, total=False): bearer_token: str azure: AzureProviderOptions # Azure-specific options # Maximum number of tokens the model can generate in a single response. - # Sent as max_tokens per LLM API request. When hit, the model stops - # generating and returns a truncated response. + # When hit, the model stops generating and returns a truncated response. max_output_tokens: int # Maximum number of tokens allowed in the prompt for a single LLM API request. # Used by the runtime to trigger conversation compaction before sending a