diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a42442..912875a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## Unreleased +- Updated managed GonkaGate `limit.context` values to match the current + deployed windows: `240000` for Kimi/Qwen and `180000` for MiniMax. - Wrote the validated GonkaGate chat-completions catalog into the managed provider config so Kilo's OpenCode-style `/models` picker can switch between Kimi K2.6, Qwen3 235B A22B Instruct 2507 FP8, and MiniMax M2.7. diff --git a/docs/gonkagate-x-kilo.md b/docs/gonkagate-x-kilo.md index 5121748..ad9db1d 100644 --- a/docs/gonkagate-x-kilo.md +++ b/docs/gonkagate-x-kilo.md @@ -135,7 +135,7 @@ Right now the public default is deliberately small: `qwen/qwen3-235b-a22b-instruct-2507-fp8`, and `minimaxai/minimax-m2.7` - managed limits: `limit.output = 8192` for all validated entries; - `limit.context = 262144` for Kimi/Qwen and `204800` for MiniMax + `limit.context = 240000` for Kimi/Qwen and `180000` for MiniMax We are treating model support as a curated list, not as a vague "it probably works" promise. diff --git a/docs/how-it-works.md b/docs/how-it-works.md index 4fac58a..c3d0724 100644 --- a/docs/how-it-works.md +++ b/docs/how-it-works.md @@ -30,7 +30,7 @@ Current public limit: `@kilocode/cli >=7.2.0`, `chat/completions`, and non-Windows production claims - the curated default is - `moonshotai/Kimi-K2.6` with `limit.context = 262144` and + `moonshotai/Kimi-K2.6` with `limit.context = 240000` and `limit.output = 8192` - the written provider config includes the validated chat-completions model catalog, currently `moonshotai/Kimi-K2.6`, diff --git a/docs/release-readiness.md b/docs/release-readiness.md index 030126d..21db173 100644 --- a/docs/release-readiness.md +++ b/docs/release-readiness.md @@ -24,8 +24,8 @@ docs, and tests for these facts: `qwen/qwen3-235b-a22b-instruct-2507-fp8`, and `minimaxai/minimax-m2.7` - curated model limits for the validated catalog: - `limit.output = 8192` for all validated entries; `limit.context = 262144` - for Kimi/Qwen and `204800` for MiniMax + `limit.output = 8192` for all validated entries; `limit.context = 240000` + for Kimi/Qwen and `180000` for MiniMax - managed secret path: `~/.gonkagate/kilo/api-key` - project scope stays secret-free and still depends on a compatible user-level `provider.gonkagate` definition on each machine @@ -34,24 +34,21 @@ docs, and tests for these facts: ## External Evidence Captured -The current default-model contract is backed by product direction plus public -Moonshot metadata checked on 2026-04-29: +The current model-limit contract is backed by product direction plus Gonka +deployment metadata checked on 2026-06-23: -- the Kimi K2.6 model card lists `moonshotai/Kimi-K2.6` and documents a 256K - context window +- Gonka deployment model args list Kimi K2.6 and Qwen3 235B A22B Instruct 2507 + FP8 with `--max-model-len 240000` +- Gonka deployment model args list `minimaxai/minimax-m2.7` with + `--max-model-len 180000` - the model card documents OpenAI-compatible chat completions access through Moonshot's API - the API docs describe the supported request path as `POST /v1/chat/completions` - the package writes `limit.output = 8192` for validated catalog entries as the installer-managed Kilo compatibility clamp because Kilo `7.2.0` requires a numeric output limit in custom model config -- the April 14, 2026 Kilo compatibility spike captured - `qwen/qwen3-235b-a22b-instruct-2507-fp8` as a validated - chat-completions model with a 262K context window and `maxTokens` 8192 -- NVIDIA NIM metadata checked on 2026-05-29 lists - `minimaxai/minimax-m2.7` as MiniMax M2.7 with a 204,800 input context - length; the package keeps the same installer-managed `limit.output = 8192` - compatibility clamp for this catalog entry +- the package keeps the same installer-managed `limit.output = 8192` + compatibility clamp for every validated catalog entry - npm registry metadata checked on 2026-04-29 showed `@kilocode/cli` patch releases in the `7.2.x` line, including `7.2.14`, with both `kilo` and `kilocode` binaries still exposed by the wrapper package diff --git a/docs/specs/kilo-setup-prd/compatibility-spike-notes.md b/docs/specs/kilo-setup-prd/compatibility-spike-notes.md index cf1685c..4abbb69 100644 --- a/docs/specs/kilo-setup-prd/compatibility-spike-notes.md +++ b/docs/specs/kilo-setup-prd/compatibility-spike-notes.md @@ -23,7 +23,7 @@ not contain raw `kilo debug config` output or real secrets. - example model id: `qwen/qwen3-235b-a22b-instruct-2507-fp8` - GonkaGate public model page checked: - - `qwen/qwen3-235b-a22b-instruct-2507-fp8` context window: `262K` + - `qwen/qwen3-235b-a22b-instruct-2507-fp8` context window: `240000` - GonkaGate OpenClaw custom-provider guidance documents `maxTokens: 8192` for the same model. diff --git a/docs/specs/kilo-setup-prd/spec.md b/docs/specs/kilo-setup-prd/spec.md index c96c18a..524dd60 100644 --- a/docs/specs/kilo-setup-prd/spec.md +++ b/docs/specs/kilo-setup-prd/spec.md @@ -66,8 +66,8 @@ Redacted evidence notes are preserved in - A full `.../chat/completions` URL must not be written to any Kilo base URL field; Kilo appends `chat/completions` through the OpenAI-compatible adapter. - Production setup should leave `small_model` untouched by default. -- GonkaGate public sources prove the selected model's context window as `262K` - tokens, interpreted for Kilo as `262144`. +- GonkaGate deployment sources prove the selected model's context window as + `240000` tokens. - GonkaGate public OpenClaw guidance documents `maxTokens: 8192` for the selected model, which the installer writes as Kilo `limit.output`. - No safe `GONKAGATE_API_KEY` was present during the spike, so live GonkaGate @@ -538,7 +538,7 @@ Expected production managed provider shape: "name": "Qwen3 235B A22B Instruct 2507 FP8", "tool_call": true, "limit": { - "context": 262144, + "context": 240000, "output": "BLOCKED_UNTIL_GONKAGATE_LIMIT_IS_PROVEN", }, }, @@ -576,8 +576,8 @@ Evidence: - The same fake smoke proved that writing a full `.../chat/completions` URL produces a doubled `/chat/completions/chat/completions` path. -The `limit.context` value is proven from the GonkaGate public model page as -`262K`, mapped to the integer `262144`. GonkaGate OpenClaw guidance documents +The `limit.context` value is proven from the GonkaGate deployment metadata as +`240000`. GonkaGate OpenClaw guidance documents `maxTokens: 8192` for the same model, so the installer writes `limit.output = 8192`. Kilo docs state that `context: 0` disables compaction and context-size-dependent usage tracking, while `output: 0` falls back to Kilo's diff --git a/docs/user-guide.md b/docs/user-guide.md index 7eb553b..6336fae 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -30,7 +30,7 @@ Today the public package is intentionally narrow: `qwen/qwen3-235b-a22b-instruct-2507-fp8`, and `minimaxai/minimax-m2.7` - managed model limits: `limit.output = 8192` for all validated entries; - `limit.context = 262144` for Kimi/Qwen and `204800` for MiniMax + `limit.context = 240000` for Kimi/Qwen and `180000` for MiniMax - no native Windows production claim yet ## Before You Run It diff --git a/src/constants/models.ts b/src/constants/models.ts index 1ebf5c6..55dab3e 100644 --- a/src/constants/models.ts +++ b/src/constants/models.ts @@ -84,14 +84,14 @@ export const CURATED_MODEL_REGISTRY = Object.freeze({ adapterPackage: "@ai-sdk/openai-compatible", displayName: "Kimi K2.6", limits: { - context: 262144, + context: 240000, output: 8192, }, modelId: "moonshotai/Kimi-K2.6", recommended: true, runtimeCompatibility: { notes: [ - "Moonshot model metadata lists Kimi K2.6 with a 256K context window and OpenAI-compatible chat completions access.", + "Gonka deployment metadata lists Kimi K2.6 with a 240,000 token context window and OpenAI-compatible chat completions access.", "The installer writes limit.output = 8192 as a conservative Kilo 7.2.0 compatibility clamp for this default.", ], }, @@ -102,14 +102,14 @@ export const CURATED_MODEL_REGISTRY = Object.freeze({ adapterPackage: "@ai-sdk/openai-compatible", displayName: "Qwen3 235B A22B Instruct 2507 FP8", limits: { - context: 262144, + context: 240000, output: 8192, }, modelId: "qwen/qwen3-235b-a22b-instruct-2507-fp8", recommended: false, runtimeCompatibility: { notes: [ - "Official GonkaGate model metadata lists this model as available with a 262K context window.", + "Gonka deployment metadata lists this model as available with a 240,000 token context window.", "Official GonkaGate OpenClaw provider guidance lists this model with maxTokens 8192.", "Kilo 7.2.0 rejects custom model entries without a numeric limit.output value, so the installer writes limit.output = 8192 for this validated default.", ], @@ -121,14 +121,14 @@ export const CURATED_MODEL_REGISTRY = Object.freeze({ adapterPackage: "@ai-sdk/openai-compatible", displayName: "MiniMax M2.7", limits: { - context: 204800, + context: 180000, output: 8192, }, modelId: "minimaxai/minimax-m2.7", recommended: false, runtimeCompatibility: { notes: [ - "NVIDIA NIM metadata lists minimaxai/minimax-m2.7 as MiniMax M2.7 with a 204,800 input context length.", + "Gonka deployment metadata lists minimaxai/minimax-m2.7 as MiniMax M2.7 with a 180,000 token context window.", "The installer writes limit.output = 8192 as the existing conservative Kilo 7.2.0 compatibility clamp for validated catalog entries.", ], }, diff --git a/test/cli.test.ts b/test/cli.test.ts index f4f1d66..840018b 100644 --- a/test/cli.test.ts +++ b/test/cli.test.ts @@ -27,7 +27,7 @@ const VALIDATED_MODEL: RecommendedProductionDefaultCuratedModel = { displayName: "Kimi K2.6", key: MODEL_KEY, limits: { - context: 262144, + context: 240000, output: 8192, }, modelId: "moonshotai/Kimi-K2.6", diff --git a/test/install/models.test.ts b/test/install/models.test.ts index f1b26b5..fdbbab4 100644 --- a/test/install/models.test.ts +++ b/test/install/models.test.ts @@ -12,7 +12,7 @@ test("the default curated model is the shipped validated Kimi public default", ( assert.equal(model.adapterPackage, "@ai-sdk/openai-compatible"); assert.equal(model.validationStatus, "validated"); - assert.equal(model.limits?.context, 262144); + assert.equal(model.limits?.context, 240000); assert.equal(model.limits?.output, 8192); assert.deepEqual(getValidatedModelKeys(), [ "kimi-k2.6", diff --git a/test/install/test-model-catalog.ts b/test/install/test-model-catalog.ts index 10beeba..56fa01d 100644 --- a/test/install/test-model-catalog.ts +++ b/test/install/test-model-catalog.ts @@ -6,7 +6,7 @@ export const TEST_VALIDATED_MODEL = { displayName: "Kimi K2.6", key: "kimi-k2.6", limits: { - context: 262144, + context: 240000, output: 8192, }, modelId: "moonshotai/Kimi-K2.6", diff --git a/test/package-contract.test.ts b/test/package-contract.test.ts index bb0c39d..fc0ddc3 100644 --- a/test/package-contract.test.ts +++ b/test/package-contract.test.ts @@ -138,14 +138,14 @@ test("curated model registry exposes Kimi as the shipped validated default", () const minimax = CURATED_MODEL_REGISTRY["minimax-m2.7"]; assert.equal(kimi.adapterPackage, "@ai-sdk/openai-compatible"); - assert.deepEqual(kimi.limits, { context: 262144, output: 8192 }); + assert.deepEqual(kimi.limits, { context: 240000, output: 8192 }); assert.equal(kimi.modelId, "moonshotai/Kimi-K2.6"); assert.equal(kimi.recommended, true); assert.equal(kimi.transport, "chat_completions"); assert.equal(kimi.validationStatus, "validated"); assert.equal(qwen.adapterPackage, "@ai-sdk/openai-compatible"); - assert.equal(qwen.limits?.context, 262144); + assert.equal(qwen.limits?.context, 240000); assert.equal(qwen.limits?.output, 8192); assert.equal(qwen.modelId, "qwen/qwen3-235b-a22b-instruct-2507-fp8"); assert.equal(qwen.recommended, false); @@ -153,7 +153,7 @@ test("curated model registry exposes Kimi as the shipped validated default", () assert.equal(qwen.validationStatus, "validated"); assert.equal(minimax.adapterPackage, "@ai-sdk/openai-compatible"); - assert.equal(minimax.limits?.context, 204800); + assert.equal(minimax.limits?.context, 180000); assert.equal(minimax.limits?.output, 8192); assert.equal(minimax.modelId, "minimaxai/minimax-m2.7"); assert.equal(minimax.recommended, false); @@ -172,9 +172,9 @@ test("managed provider config includes the required Kilo model limits", () => { models: Record; }; - assert.equal(providerConfig.models["kimi-k2.6"]?.limit?.context, 262144); + assert.equal(providerConfig.models["kimi-k2.6"]?.limit?.context, 240000); assert.equal(providerConfig.models["kimi-k2.6"]?.limit?.output, 8192); - assert.equal(providerConfig.models["minimax-m2.7"]?.limit?.context, 204800); + assert.equal(providerConfig.models["minimax-m2.7"]?.limit?.context, 180000); assert.equal(providerConfig.models["minimax-m2.7"]?.limit?.output, 8192); });