diff --git a/.opencode/prompts/core/openagent/README.md b/.opencode/prompts/core/openagent/README.md index c8050c39..8ec6c4ca 100644 --- a/.opencode/prompts/core/openagent/README.md +++ b/.opencode/prompts/core/openagent/README.md @@ -193,8 +193,9 @@ open ../results/index.html ### `minimax.md` - MiniMax Optimized **Target Models:** -- `minimax/MiniMax-M2.7` (latest, 1M context window) -- `minimax/MiniMax-M2.7-highspeed` (same performance, faster) +- `minimax/MiniMax-M3` (latest flagship, default — 512K context, 128K max output, image input) +- `minimax/MiniMax-M2.7` (previous generation, 1M context window) +- `minimax/MiniMax-M2.7-highspeed` (previous generation, faster) **Optimizations:** - Structured with clear sections and explicit instructions @@ -214,12 +215,12 @@ open ../results/index.html **Known Issues:** None documented yet -**Use When:** Using MiniMax models (M2.7 family) +**Use When:** Using MiniMax models (M3 recommended, M2.7 family for legacy) **Setup:** 1. Get your API key from [MiniMax Platform](https://platform.minimax.io) 2. Set `MINIMAX_API_KEY` in your environment -3. Configure model: `minimax/MiniMax-M2.7` or `minimax/MiniMax-M2.7-highspeed` +3. Configure model: `minimax/MiniMax-M3` (recommended), `minimax/MiniMax-M2.7`, or `minimax/MiniMax-M2.7-highspeed` --- diff --git a/.opencode/prompts/core/openagent/minimax.md b/.opencode/prompts/core/openagent/minimax.md index b83d0177..3bf2b001 100644 --- a/.opencode/prompts/core/openagent/minimax.md +++ b/.opencode/prompts/core/openagent/minimax.md @@ -28,8 +28,9 @@ permissions: # Prompt Metadata model_family: "minimax" recommended_models: - - "minimax/MiniMax-M2.7" # Latest, peak performance (1M context) - - "minimax/MiniMax-M2.7-highspeed" # Same performance, faster and more agile + - "minimax/MiniMax-M3" # Latest flagship, default (512K context, 128K max output) + - "minimax/MiniMax-M2.7" # Previous generation, peak performance (1M context) + - "minimax/MiniMax-M2.7-highspeed" # Previous generation, faster and more agile tested_with: null last_tested: null maintainer: "community" diff --git a/evals/framework/src/sdk/__tests__/minimax-integration.test.ts b/evals/framework/src/sdk/__tests__/minimax-integration.test.ts index e59a105d..5002513b 100644 --- a/evals/framework/src/sdk/__tests__/minimax-integration.test.ts +++ b/evals/framework/src/sdk/__tests__/minimax-integration.test.ts @@ -30,6 +30,12 @@ describe('MiniMax Integration', () => { }); describe('model behavior integration', () => { + it('should resolve MiniMax-M3 via provider prefix', () => { + const behavior = getModelBehavior('minimax/MiniMax-M3'); + expect(behavior).not.toBe(MODEL_BEHAVIORS['default']); + expect(behavior.typicalResponseTime).toBe(8000); + }); + it('should resolve MiniMax-M2.7 via provider prefix', () => { const behavior = getModelBehavior('minimax/MiniMax-M2.7'); expect(behavior).not.toBe(MODEL_BEHAVIORS['default']); @@ -43,10 +49,13 @@ describe('MiniMax Integration', () => { }); it('should calculate appropriate timeouts for eval tests', () => { + const m3Timeout = calculateModelTimeout(30000, 'minimax/MiniMax-M3'); const standardTimeout = calculateModelTimeout(30000, 'minimax/MiniMax-M2.7'); const highspeedTimeout = calculateModelTimeout(30000, 'minimax/MiniMax-M2.7-highspeed'); - // Both should be reasonable for eval tests + // All should be reasonable for eval tests + expect(m3Timeout).toBeGreaterThanOrEqual(24000); + expect(m3Timeout).toBeLessThanOrEqual(120000); expect(standardTimeout).toBeGreaterThanOrEqual(24000); expect(standardTimeout).toBeLessThanOrEqual(120000); expect(highspeedTimeout).toBeGreaterThanOrEqual(15000); diff --git a/evals/framework/src/sdk/__tests__/minimax-model-behaviors.test.ts b/evals/framework/src/sdk/__tests__/minimax-model-behaviors.test.ts index 37c46ddc..4d214d21 100644 --- a/evals/framework/src/sdk/__tests__/minimax-model-behaviors.test.ts +++ b/evals/framework/src/sdk/__tests__/minimax-model-behaviors.test.ts @@ -10,6 +10,10 @@ import { MODEL_BEHAVIORS, getModelBehavior, calculateModelTimeout } from '../mod describe('MiniMax Model Behaviors', () => { describe('MODEL_BEHAVIORS registry', () => { + it('should include MiniMax-M3 entry', () => { + expect(MODEL_BEHAVIORS['MiniMax-M3']).toBeDefined(); + }); + it('should include MiniMax-M2.7 entry', () => { expect(MODEL_BEHAVIORS['MiniMax-M2.7']).toBeDefined(); }); @@ -18,6 +22,14 @@ describe('MiniMax Model Behaviors', () => { expect(MODEL_BEHAVIORS['MiniMax-M2.7-highspeed']).toBeDefined(); }); + it('should have correct properties for MiniMax-M3', () => { + const behavior = MODEL_BEHAVIORS['MiniMax-M3']; + expect(behavior.sendsCompletionText).toBe(true); + expect(behavior.mayEndWithToolCalls).toBe(false); + expect(behavior.typicalResponseTime).toBe(8000); + expect(behavior.toolCompletionGrace).toBe(4000); + }); + it('should have correct properties for MiniMax-M2.7', () => { const behavior = MODEL_BEHAVIORS['MiniMax-M2.7']; expect(behavior.sendsCompletionText).toBe(true); @@ -42,6 +54,11 @@ describe('MiniMax Model Behaviors', () => { }); describe('getModelBehavior()', () => { + it('should return exact match for MiniMax-M3', () => { + const behavior = getModelBehavior('MiniMax-M3'); + expect(behavior).toBe(MODEL_BEHAVIORS['MiniMax-M3']); + }); + it('should return exact match for MiniMax-M2.7', () => { const behavior = getModelBehavior('MiniMax-M2.7'); expect(behavior).toBe(MODEL_BEHAVIORS['MiniMax-M2.7']); @@ -52,6 +69,11 @@ describe('MiniMax Model Behaviors', () => { expect(behavior).toBe(MODEL_BEHAVIORS['MiniMax-M2.7-highspeed']); }); + it('should return partial match for minimax/MiniMax-M3', () => { + const behavior = getModelBehavior('minimax/MiniMax-M3'); + expect(behavior.typicalResponseTime).toBe(8000); + }); + it('should return partial match for minimax/MiniMax-M2.7', () => { const behavior = getModelBehavior('minimax/MiniMax-M2.7'); expect(behavior.typicalResponseTime).toBe(8000); @@ -64,6 +86,11 @@ describe('MiniMax Model Behaviors', () => { }); describe('calculateModelTimeout()', () => { + it('should calculate timeout for MiniMax-M3', () => { + const timeout = calculateModelTimeout(30000, 'MiniMax-M3'); + expect(timeout).toBeGreaterThanOrEqual(24000); // At least 3x typicalResponseTime + }); + it('should calculate timeout for MiniMax-M2.7', () => { const timeout = calculateModelTimeout(30000, 'MiniMax-M2.7'); expect(timeout).toBeGreaterThanOrEqual(24000); // At least 3x typicalResponseTime diff --git a/evals/framework/src/sdk/__tests__/minimax-prompt-variant.test.ts b/evals/framework/src/sdk/__tests__/minimax-prompt-variant.test.ts index e88a1e29..f0241452 100644 --- a/evals/framework/src/sdk/__tests__/minimax-prompt-variant.test.ts +++ b/evals/framework/src/sdk/__tests__/minimax-prompt-variant.test.ts @@ -29,11 +29,15 @@ describe('MiniMax Prompt Variant', () => { expect(content).toContain('model_family: "minimax"'); }); - it('should recommend MiniMax-M2.7 as primary model', () => { + it('should recommend MiniMax-M3 as primary model', () => { + expect(content).toContain('minimax/MiniMax-M3'); + }); + + it('should retain MiniMax-M2.7 as legacy alternative', () => { expect(content).toContain('minimax/MiniMax-M2.7'); }); - it('should recommend MiniMax-M2.7-highspeed as alternative', () => { + it('should retain MiniMax-M2.7-highspeed as legacy alternative', () => { expect(content).toContain('minimax/MiniMax-M2.7-highspeed'); }); diff --git a/evals/framework/src/sdk/model-behaviors.ts b/evals/framework/src/sdk/model-behaviors.ts index 9e90301b..c62ac145 100644 --- a/evals/framework/src/sdk/model-behaviors.ts +++ b/evals/framework/src/sdk/model-behaviors.ts @@ -73,6 +73,12 @@ export const MODEL_BEHAVIORS: Record = { typicalResponseTime: 6000, toolCompletionGrace: 3000, }, + 'MiniMax-M3': { + sendsCompletionText: true, + mayEndWithToolCalls: false, + typicalResponseTime: 8000, + toolCompletionGrace: 4000, + }, 'MiniMax-M2.7-highspeed': { sendsCompletionText: true, mayEndWithToolCalls: false,