From 25df00ca5f3a9e5718a856ec7f84e0cb2e01d5c7 Mon Sep 17 00:00:00 2001 From: Sangwon Lee Date: Mon, 20 Apr 2026 11:53:19 +0900 Subject: [PATCH 01/14] feat: prompt user for evolution via Stop-hook AskUserQuestion Stop hook scans party for pokemon ready to evolve (branch + single chain) and emits {decision:"block", reason} to force Claude to call AskUserQuestion instead of auto-evolving or silently staging a flag. User selects a target, Claude runs `tokenmon evolve `. Refuse sets evolution_prompt_shown, which holds the prompt until the user manually runs `/tkm evolve` to clear it. - evolution.ts: single-chain now uses the same flag-based flow as branch evolutions; state-missing callers keep the original return signature so existing tests remain green - stop.ts: post-XP scan emits block JSON first, then persists the prompt_shown flag under lock (duplicate prompt > silent loss on crash); lock failures are logged rather than swallowed - markEvolutionReady helper dedups the three single-chain paths in checkEvolution - notifications.ts + status-line.ts: skip the ready hint once the prompt has fired - test/e2e: harness verifies block JSON and flag persistence via an isolated CLAUDE_CONFIG_DIR; tmux full-session harness remains TODO --- src/cli/tokenmon.ts | 22 ++- src/core/evolution.ts | 105 ++++++++++++- src/core/notifications.ts | 1 + src/core/types.ts | 1 + src/hooks/stop.ts | 67 ++++++-- src/i18n/en.json | 2 + src/i18n/en.pokemon.json | 2 + src/i18n/ko.json | 2 + src/i18n/ko.pokemon.json | 2 + src/status-line.ts | 2 +- test/e2e/evolve-askuserquestion.test.ts | 201 ++++++++++++++++++++++++ 11 files changed, 387 insertions(+), 20 deletions(-) create mode 100644 test/e2e/evolve-askuserquestion.test.ts diff --git a/src/cli/tokenmon.ts b/src/cli/tokenmon.ts index 3801170e..d48bcce2 100644 --- a/src/cli/tokenmon.ts +++ b/src/cli/tokenmon.ts @@ -11,7 +11,7 @@ import { getCompletion, getPokedexList, syncPokedexFromUnlocked, getRegionSummar import { getBoxList } from '../core/box.js'; import { getCurrentRegion, getRegionList, moveToRegion } from '../core/regions.js'; import { renderGuide, renderGuideIndex } from '../core/guide.js'; -import { getEligibleBranches, applyBranchEvolution } from '../core/evolution.js'; +import { getEligibleBranches, applyBranchEvolution, applySingleChainEvolution } from '../core/evolution.js'; import { getActiveNotifications, dismissAll } from '../core/notifications.js'; import { getActiveEvents } from '../core/encounter.js'; import { getEventsDB, getRegionsDB, getPokedexRewardsDB } from '../core/pokemon-data.js'; @@ -1058,8 +1058,26 @@ function executeEvolve(pokemonName: string, targetName: string, _config: unknown const evolveResult = withLock(() => { const freshState = readState(); const freshConfig = readConfig(); - const result = applyBranchEvolution(freshState, freshConfig, pokemonName, targetName); + const db = getPokemonDB(); + const data = db.pokemon[toBaseId(pokemonName)]; + + let result = null; + if (data && Array.isArray(data.evolves_to)) { + // Branch evolution (e.g., Kirlia -> Gardevoir/Gallade) + result = applyBranchEvolution(freshState, freshConfig, pokemonName, targetName); + } else { + // Single-chain evolution (e.g., Turtwig -> Grotle) + result = applySingleChainEvolution(freshState, freshConfig, pokemonName, targetName); + } + if (!result) return { ok: false as const }; + + // Clear evolution_prompt_shown on the new pokemon key (if carried over) + const newKey = isShinyKey(pokemonName) ? toShinyKey(result.newPokemon) : result.newPokemon; + if (freshState.pokemon[newKey]) { + freshState.pokemon[newKey].evolution_prompt_shown = undefined; + } + writeState(freshState); writeConfig(freshConfig); return { ok: true as const, result }; diff --git a/src/core/evolution.ts b/src/core/evolution.ts index 9cef0e33..a43c02ec 100644 --- a/src/core/evolution.ts +++ b/src/core/evolution.ts @@ -1,6 +1,6 @@ import { getPokemonDB, parseCrossGenRef, ensurePokemonInDB } from './pokemon-data.js'; import { isShinyKey, toBaseId, toShinyKey } from './shiny-utils.js'; -import type { State, Config, EvolutionResult, EvolutionContext, BranchEvolution } from './types.js'; +import type { State, Config, EvolutionResult, EvolutionContext, BranchEvolution, PokemonState } from './types.js'; const FRIENDSHIP_THRESHOLD = 220; @@ -10,6 +10,19 @@ export interface BranchInfo { conditionLabel: string; } +/** + * Mark a pokemon ready for evolution prompt. Returns true if already prompted + * (caller should return null). Used by both single-chain paths in checkEvolution. + */ +function markEvolutionReady(pState: PokemonState, target: string): boolean { + if (pState.evolution_prompt_shown) return true; + if (!pState.evolution_ready) { + pState.evolution_ready = true; + pState.evolution_options = [target]; + } + return false; +} + /** * Check if a pokemon should evolve given the current context. * Supports: level, friendship, trade (achievement proxy), item, region. @@ -40,7 +53,7 @@ export function checkEvolution( }); const conditionMet = filtered.filter(b => b.conditionMet); const pState = state.pokemon[pokemonName]; - if (pState) { + if (pState && !pState.evolution_prompt_shown) { if (conditionMet.length > 0 && !pState.evolution_ready) { pState.evolution_ready = true; pState.evolution_options = conditionMet.map(b => b.name); @@ -82,6 +95,14 @@ export function checkEvolution( } else { return null; } + + // Flag-based flow when state is provided (mirrors branch evolution pattern) + if (state) { + const pState = state.pokemon[pokemonName]; + if (pState) markEvolutionReady(pState, targetName); + return null; + } + return { oldPokemon: pokemonName, newPokemon: targetName, newId: targetData.id, level: context.newLevel }; } @@ -105,6 +126,13 @@ export function checkEvolution( const triggered = checkCondition(condition, context); if (!triggered) return null; + // Flag-based flow when state is provided + if (state) { + const pState = state.pokemon[pokemonName]; + if (pState) markEvolutionReady(pState, nextPokemon); + return null; + } + return { oldPokemon: pokemonName, newPokemon: nextPokemon, @@ -116,6 +144,13 @@ export function checkEvolution( // Level-based evolution (default) if (data.evolves_at == null) return null; if (context.newLevel >= data.evolves_at && context.oldLevel < data.evolves_at) { + // Flag-based flow when state is provided + if (state) { + const pState = state.pokemon[pokemonName]; + if (pState) markEvolutionReady(pState, nextPokemon); + return null; + } + return { oldPokemon: pokemonName, newPokemon: nextPokemon, @@ -183,6 +218,72 @@ export function applyBranchEvolution( // Clear branching flags pState.evolution_ready = undefined; pState.evolution_options = undefined; + pState.evolution_prompt_shown = undefined; + + return result; +} + +/** + * Apply a user-selected single-chain evolution (string `evolves_to` or legacy line[stage+1]). + * Mirrors applyBranchEvolution for non-branching pokemon. + */ +export function applySingleChainEvolution( + state: State, + config: Config, + pokemonName: string, + targetName: string, +): EvolutionResult | null { + const db = getPokemonDB(); + const data = db.pokemon[toBaseId(pokemonName)]; + if (!data) return null; + + // Must be single-chain (not branching) + if (Array.isArray(data.evolves_to)) return null; + + // Validate target: either string evolves_to (optionally cross-gen) or legacy line[stage+1] + let resolvedTarget: string | null = null; + let targetData: typeof db.pokemon[string] | undefined; + + if (typeof data.evolves_to === 'string') { + resolvedTarget = data.evolves_to; + targetData = db.pokemon[resolvedTarget]; + const crossRef = parseCrossGenRef(resolvedTarget); + if (crossRef) { + resolvedTarget = crossRef.id; + targetData = ensurePokemonInDB(resolvedTarget) ?? undefined; + } + } else { + // Legacy path: line[stage+1] + const nextStage = data.stage + 1; + if (nextStage < data.line.length) { + resolvedTarget = data.line[nextStage]; + targetData = db.pokemon[resolvedTarget]; + } + } + + if (!resolvedTarget || !targetData) return null; + if (resolvedTarget !== targetName) return null; + + // Block re-evolution if already unlocked (defense-in-depth) + const evolvedKey = isShinyKey(pokemonName) ? toShinyKey(targetName) : targetName; + if (state.unlocked.includes(evolvedKey)) return null; + + const pState = state.pokemon[pokemonName]; + if (!pState) return null; + + const result: EvolutionResult = { + oldPokemon: pokemonName, + newPokemon: targetName, + newId: targetData.id, + level: pState.level, + }; + + applyEvolution(state, config, result, pState.xp); + + // Clear flags + pState.evolution_ready = undefined; + pState.evolution_options = undefined; + pState.evolution_prompt_shown = undefined; return result; } diff --git a/src/core/notifications.ts b/src/core/notifications.ts index 59b22177..15773dec 100644 --- a/src/core/notifications.ts +++ b/src/core/notifications.ts @@ -16,6 +16,7 @@ export function checkPendingNotifications(state: State, config: Config, commonSt // 1. Evolution ready for (const [name, pState] of Object.entries(state.pokemon)) { if (!pState.evolution_ready) continue; + if (pState.evolution_prompt_shown) continue; // already prompted via stop block if (!config.party.includes(name)) continue; const id = `evolution_ready:${name}`; if (state.dismissed_notifications.includes(id)) continue; diff --git a/src/core/types.ts b/src/core/types.ts index 601acf24..88eafa82 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -92,6 +92,7 @@ export interface PokemonState { call_count?: number; evolution_ready?: boolean; evolution_options?: string[]; + evolution_prompt_shown?: boolean; moves?: number[]; met?: MetType; met_detail?: MetDetail; diff --git a/src/hooks/stop.ts b/src/hooks/stop.ts index c41086ff..c7ddb04d 100644 --- a/src/hooks/stop.ts +++ b/src/hooks/stop.ts @@ -5,7 +5,7 @@ import { readState, writeState, pruneSessionTokens, readSessionGenMap, writeSess import { readConfig, writeConfig, readGlobalConfig, writeGlobalConfig } from '../core/config.js'; import { getPokemonDB, getPokemonName, ensurePokemonInDB } from '../core/pokemon-data.js'; import { levelToXp, xpToLevel } from '../core/xp.js'; -import { checkEvolution, applyEvolution, addFriendship, FRIENDSHIP_PER_LEVELUP, FRIENDSHIP_PER_SESSION } from '../core/evolution.js'; +import { checkEvolution, addFriendship, FRIENDSHIP_PER_LEVELUP, FRIENDSHIP_PER_SESSION } from '../core/evolution.js'; import { checkAchievements, checkCommonAchievements, formatAchievementMessage } from '../core/achievements.js'; import { t, initLocale } from '../i18n/index.js'; import type { HookInput, HookOutput, ExpGroup } from '../core/types.js'; @@ -334,20 +334,10 @@ async function main(): Promise { unlockedAchievements: Object.keys(state.achievements).filter(k => state.achievements[k]), items: state.items ?? {}, }; - const evolution = checkEvolution(pokemonName, evoContext, state); - if (evolution) { - applyEvolution(state, config, evolution, newXp); - messages.push(t('hook.evolution', { pokemon: getPokemonName(pokemonName), newPokemon: getPokemonName(evolution.newPokemon) })); - playSfx('gacha'); - - // Check first_evolution achievement immediately - const achEvents = checkAchievements(state, config, commonState, gen); - for (const achEvent of achEvents) { - const msg = formatAchievementMessage(achEvent); - messages.push(msg); - achievementMessages.push(msg); - } - } + // Flag-based evolution: checkEvolution sets evolution_ready on the state + // for both branch and single-chain evolutions. Auto-evolve no longer happens + // here — block emission in post-lock scan triggers AskUserQuestion flow. + checkEvolution(pokemonName, evoContext, state); } // ── Codex flat XP (no volume tier / rest bonus, normal turn) ── @@ -599,6 +589,53 @@ async function main(): Promise { output.system_message = messages.join('\n'); } + // ── Evolution block detection (post-lock) ── + // Scan party for pokemon with evolution_ready && !evolution_prompt_shown. + // If found, emit decision:"block" with a reason instructing Claude to use + // AskUserQuestion. Flag is set AFTER block emission (Risk 6: duplication > loss). + { + const postConfig = readConfig(gen); + const postState = readState(gen); + const candidates: Array<{ name: string; options: string[] }> = []; + for (const name of postConfig.party) { + const ps = postState.pokemon[name]; + if (ps?.evolution_ready && !ps.evolution_prompt_shown) { + candidates.push({ name, options: ps.evolution_options ?? [] }); + } + } + if (candidates.length > 0) { + const batch = candidates.slice(0, 4); + const candidateList = batch + .map(c => t('hook.evolution_candidate_line', { + pokemon: getPokemonName(c.name), + targets: c.options.map(o => getPokemonName(o)).join(', '), + })) + .join('\n'); + const reason = t('hook.evolution_block_reason', { candidateList }); + playCry(); + console.log(JSON.stringify({ decision: 'block', reason })); + + // Set evolution_prompt_shown AFTER block emission to avoid silent loss on crash. + // If this write fails, the block will re-emit on next Stop — duplicate prompt + // (UX degradation) is strictly preferable to silent infinite-block loops. + try { + const lockResult = withLock(() => { + const s = readState(gen); + for (const c of batch) { + if (s.pokemon[c.name]) s.pokemon[c.name].evolution_prompt_shown = true; + } + writeState(s, gen); + }); + if (!lockResult.acquired) { + process.stderr.write('tokenmon stop: lock busy during evolution_prompt_shown write; will re-prompt next stop\n'); + } + } catch (err) { + process.stderr.write(`tokenmon stop: evolution_prompt_shown write failed: ${err}\n`); + } + return; + } + } + playCry(); console.log(JSON.stringify(output)); } diff --git a/src/i18n/en.json b/src/i18n/en.json index 57dc103d..2e275672 100644 --- a/src/i18n/en.json +++ b/src/i18n/en.json @@ -404,6 +404,8 @@ "hook.levelup": "⬆️ {pokemon} Lv.{from} → Lv.{to}! (XP: +{xp})", "hook.evolution": "✨ {pokemon} evolved into {newPokemon}!", "hook.party_join": "🎊 {pokemon} joined the party!", + "hook.evolution_candidate_line": "- {pokemon} can evolve to: {targets} (or refuse)", + "hook.evolution_block_reason": "The following party pokemon are ready to evolve. You MUST call AskUserQuestion to ask the user which evolution they want for each one (one subquestion per pokemon, up to 4 subquestions).\n\n{candidateList}\n\nFor each pokemon, present the evolution targets as options plus a \"Refuse\" option. If the user selects a target, run: `tokenmon evolve `. If the user refuses, do nothing — the pokemon will not be re-prompted automatically.", "tier.heated": "The tall grass is rustling intensely... (Next: encounter 1.5x, XP 1.5x)", "tier.intense": "Something seems to be lurking nearby... (Next: encounter 2.5x, XP 2.5x)", diff --git a/src/i18n/en.pokemon.json b/src/i18n/en.pokemon.json index 058d395d..97538283 100644 --- a/src/i18n/en.pokemon.json +++ b/src/i18n/en.pokemon.json @@ -371,6 +371,8 @@ "hook.levelup": "⬆️ {pokemon} Lv.{from} grew to Lv.{to}! (XP +{xp})", "hook.evolution": "✨ What? {pokemon} evolved into {newPokemon}!", "hook.party_join": "✨ {pokemon} joined the team!", + "hook.evolution_candidate_line": "- What? {pokemon} is ready to evolve into: {targets} (or refuse)", + "hook.evolution_block_reason": "What?! The following party pokemon are ready to evolve! You MUST call AskUserQuestion to ask the Trainer which evolution they want for each one (one subquestion per pokemon, up to 4 subquestions).\n\n{candidateList}\n\nFor each pokemon, present the evolution targets as options plus a \"Refuse\" option. If the Trainer selects a target, run: `tokenmon evolve `. If the Trainer refuses, do nothing — that pokemon will not be re-prompted automatically.", "tier.heated": "The tall grass is rustling intensely... (Next: encounter 1.5x, XP 1.5x)", "tier.intense": "Something seems to be lurking nearby... (Next: encounter 2.5x, XP 2.5x)", diff --git a/src/i18n/ko.json b/src/i18n/ko.json index 8040c17b..3b7ca9ce 100644 --- a/src/i18n/ko.json +++ b/src/i18n/ko.json @@ -404,6 +404,8 @@ "hook.levelup": "⬆️ {pokemon} Lv.{from} → Lv.{to}! (XP: +{xp})", "hook.evolution": "✨ {pokemon:이/가} {newPokemon}(으)로 진화했습니다!", "hook.party_join": "🎊 {pokemon:이/가} 파티에 합류했습니다!", + "hook.evolution_candidate_line": "- {pokemon} 진화 가능: {targets} (또는 거부)", + "hook.evolution_block_reason": "다음 파티 포켓몬이 진화할 준비가 되었습니다. 반드시 AskUserQuestion을 호출하여 각 포켓몬에 대해 사용자에게 진화를 물어보세요 (포켓몬당 하나의 subquestion, 최대 4개).\n\n{candidateList}\n\n각 포켓몬마다 진화 대상 옵션들과 \"거부\" 옵션을 함께 제시하세요. 사용자가 대상을 선택하면 `tokenmon evolve `을 실행하세요. 사용자가 거부하면 아무것도 하지 마세요 — 해당 포켓몬은 자동으로 다시 묻지 않습니다.", "tier.heated": "풀숲이 크게 흔들리고 있다... (다음 턴 조우율 1.5x, XP 1.5x)", "tier.intense": "주변에 수상한 기운이 감돌고 있다... (다음 턴 조우율 2.5x, XP 2.5x)", diff --git a/src/i18n/ko.pokemon.json b/src/i18n/ko.pokemon.json index aa3f47a9..dcc6f78f 100644 --- a/src/i18n/ko.pokemon.json +++ b/src/i18n/ko.pokemon.json @@ -371,6 +371,8 @@ "hook.levelup": "⬆️ {pokemon}은(는) Lv.{from}에서 레벨 {to}이(가) 되었다! (XP +{xp})", "hook.evolution": "✨ ...어라!? {pokemon:이/가} {newPokemon}(으)로 진화했다!", "hook.party_join": "✨ {pokemon:이/가} 동료가 되었다!", + "hook.evolution_candidate_line": "- 어라!? {pokemon:이/가} 진화할 준비가 되었다: {targets} (또는 거부)", + "hook.evolution_block_reason": "...어라!? 다음 파티 포켓몬이 진화할 준비가 되었다! 반드시 AskUserQuestion을 호출해서 트레이너에게 각 포켓몬의 진화를 물어봐야 한다 (포켓몬당 하나의 subquestion, 최대 4개).\n\n{candidateList}\n\n각 포켓몬마다 진화 대상 옵션들과 \"거부\" 옵션을 함께 제시해야 한다. 트레이너가 대상을 선택하면 `tokenmon evolve `을 실행한다. 트레이너가 거부하면 아무것도 하지 않는다 — 그 포켓몬은 자동으로 다시 묻지 않는다.", "tier.heated": "풀숲이 크게 흔들리고 있다... (다음 턴 조우율 1.5x, XP 1.5x)", "tier.intense": "주변에 수상한 기운이 감돌고 있다... (다음 턴 조우율 2.5x, XP 2.5x)", diff --git a/src/status-line.ts b/src/status-line.ts index 97916ccf..f3e0235f 100644 --- a/src/status-line.ts +++ b/src/status-line.ts @@ -600,7 +600,7 @@ function main(): void { // Show evolution_ready hint for party pokemon with pending branching evolution for (const pokemonName of config.party) { const pState = state.pokemon[pokemonName]; - if (pState?.evolution_ready) { + if (pState?.evolution_ready && !pState?.evolution_prompt_shown) { print(t('statusline.evolution_ready', { pokemon: getPokemonName(pokemonName) })); break; } diff --git a/test/e2e/evolve-askuserquestion.test.ts b/test/e2e/evolve-askuserquestion.test.ts new file mode 100644 index 00000000..1569825a --- /dev/null +++ b/test/e2e/evolve-askuserquestion.test.ts @@ -0,0 +1,201 @@ +/** + * E2E test: evolution AskUserQuestion via stop hook block emission. + * + * Verifies that when party pokemon have `evolution_ready && !evolution_prompt_shown`, + * the stop hook emits `{decision:"block", reason}` containing the AskUserQuestion + * instruction, and then sets `evolution_prompt_shown=true` on the scanned candidates. + * + * NOTE: Per plan Step 8, the canonical harness is tmux-based (spec AC9 calls for + * full Claude Code session launch). That infrastructure is heavier than the current + * time budget for this PR, so this test uses the `child_process` fallback path the + * plan explicitly permits (Risk 4 mitigation). It isolates the tokenmon data dir + * via `CLAUDE_CONFIG_DIR`, pipes a fake stdin JSON into stop.ts, captures stdout, + * and asserts on the block output. The tmux variant is TODO: see AC9 — the rationale + * is that the actual block JSON contract is fully tested here; tmux only adds coverage + * for the real-session harness integration which is a separate concern. + */ + +import { describe, it, before, after } from 'node:test'; +import assert from 'node:assert/strict'; +import { execFileSync, spawnSync } from 'node:child_process'; +import { mkdtempSync, rmSync, writeFileSync, readFileSync, existsSync, mkdirSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { makeState, makeConfig } from '../helpers.js'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const REPO_ROOT = join(__dirname, '..', '..'); +const STOP_HOOK_PATH = join(REPO_ROOT, 'src', 'hooks', 'stop.ts'); + +interface RunOutput { + stdout: string; + stderr: string; + status: number | null; +} + +function runStopHook(dataDir: string, stdinJson: string): RunOutput { + // Write minimal gen-map so session is recognized without session-start hook + const genMapPath = join(dataDir, 'tokenmon', 'session-gen-map.json'); + if (!existsSync(dirname(genMapPath))) mkdirSync(dirname(genMapPath), { recursive: true }); + + const result = spawnSync( + process.execPath, + ['--import', 'tsx', STOP_HOOK_PATH], + { + input: stdinJson, + env: { + ...process.env, + CLAUDE_CONFIG_DIR: dataDir, + CLAUDE_PLUGIN_ROOT: REPO_ROOT, + }, + encoding: 'utf-8', + cwd: REPO_ROOT, + timeout: 15000, + }, + ); + return { + stdout: result.stdout ?? '', + stderr: result.stderr ?? '', + status: result.status, + }; +} + +function seedState(dataDir: string, gen: string, stateOverrides: any, configOverrides: any): void { + const genDir = join(dataDir, 'tokenmon', gen); + mkdirSync(genDir, { recursive: true }); + const state = makeState(stateOverrides); + const config = makeConfig(configOverrides); + writeFileSync(join(genDir, 'state.json'), JSON.stringify(state, null, 2)); + writeFileSync(join(genDir, 'config.json'), JSON.stringify(config, null, 2)); + // global config for active generation + const globalConfig = { + active_generation: gen, + language: 'en', + voice_tone: 'claude', + weather_enabled: false, + weather_location: '', + }; + mkdirSync(join(dataDir, 'tokenmon'), { recursive: true }); + writeFileSync(join(dataDir, 'tokenmon', 'global-config.json'), JSON.stringify(globalConfig, null, 2)); + // common state + writeFileSync(join(dataDir, 'tokenmon', 'common_state.json'), JSON.stringify({ + achievements: {}, + encounter_rate_bonus: 0, + xp_bonus_multiplier: 1.0, + items: {}, + max_party_size_bonus: 0, + session_count: 0, + total_tokens_consumed: 0, + battle_count: 0, + battle_wins: 0, + catch_count: 0, + evolution_count: 0, + error_count: 0, + permission_count: 0, + total_gym_badges: 0, + completed_gym_gens: 0, + titles: [], + rare_weight_multiplier: 1.0, + last_codex_tokens_total: 0, + last_turn_ts: Date.now(), + }, null, 2)); +} + +describe('evolve AskUserQuestion via stop hook', () => { + let tmpDir: string; + + before(() => { + tmpDir = mkdtempSync(join(tmpdir(), 'tkm-evolve-e2e-')); + }); + + after(() => { + if (tmpDir) rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('block JSON is emitted with AskUserQuestion instruction when candidate is evolution_ready', () => { + const gen = 'gen4'; + const sessionId = 'test-session-evolve-1'; + // Turtwig (387) with evolution_ready already set, not yet prompted + seedState(tmpDir, gen, { + pokemon: { + '387': { + id: 387, xp: 5000, level: 18, friendship: 0, ev: 0, + evolution_ready: true, evolution_options: ['388'], + }, + }, + unlocked: ['387'], + last_session_tokens: { [sessionId]: 1000 }, // avoid first_stop + }, { + party: ['387'], + language: 'en', + }); + + const stdinJson = JSON.stringify({ session_id: sessionId }); + const out = runStopHook(tmpDir, stdinJson); + + assert.equal(out.status, 0, `stop hook should exit 0; stderr: ${out.stderr}`); + // Find the last JSON line in stdout (in case cry or other stdout appears) + const lines = out.stdout.trim().split('\n').filter(l => l.trim().startsWith('{')); + assert.ok(lines.length > 0, `expected JSON output, got: ${out.stdout}`); + const lastLine = lines[lines.length - 1]; + let parsed: any; + try { + parsed = JSON.parse(lastLine); + } catch (e) { + assert.fail(`could not parse JSON line "${lastLine}": ${e}`); + } + + // AC1 / AC2: decision:"block" with reason containing AskUserQuestion instruction + assert.equal(parsed.decision, 'block', `expected decision:"block", got: ${JSON.stringify(parsed)}`); + assert.ok(typeof parsed.reason === 'string', 'reason should be a string'); + assert.match(parsed.reason, /AskUserQuestion/i, 'reason should instruct to call AskUserQuestion'); + assert.match(parsed.reason, /tokenmon evolve/i, 'reason should include the tokenmon evolve command'); + + // Verify flag was set after block emission + const stateAfter = JSON.parse( + readFileSync(join(tmpDir, 'tokenmon', gen, 'state.json'), 'utf-8'), + ); + assert.equal( + stateAfter.pokemon['387'].evolution_prompt_shown, true, + 'evolution_prompt_shown should be set after block emission', + ); + }); + + it('no block when evolution_prompt_shown is already true', () => { + // Fresh tmp dir for isolation + const isolatedDir = mkdtempSync(join(tmpdir(), 'tkm-evolve-e2e-skip-')); + try { + const gen = 'gen4'; + const sessionId = 'test-session-evolve-2'; + seedState(isolatedDir, gen, { + pokemon: { + '387': { + id: 387, xp: 5000, level: 18, friendship: 0, ev: 0, + evolution_ready: true, evolution_options: ['388'], + evolution_prompt_shown: true, // already prompted + }, + }, + unlocked: ['387'], + last_session_tokens: { [sessionId]: 1000 }, + }, { + party: ['387'], + language: 'en', + }); + + const stdinJson = JSON.stringify({ session_id: sessionId }); + const out = runStopHook(isolatedDir, stdinJson); + + assert.equal(out.status, 0); + const lines = out.stdout.trim().split('\n').filter(l => l.trim().startsWith('{')); + const lastLine = lines[lines.length - 1]; + const parsed = JSON.parse(lastLine); + + // Should be a normal continue, not a block + assert.notEqual(parsed.decision, 'block', 'should not block when prompt_shown is true'); + assert.equal(parsed.continue, true, 'should continue normally'); + } finally { + rmSync(isolatedDir, { recursive: true, force: true }); + } + }); +}); From fb88020abb5e2eaf1c013ea9c9537205d98cf6f2 Mon Sep 17 00:00:00 2001 From: Sangwon Lee Date: Mon, 20 Apr 2026 14:07:30 +0900 Subject: [PATCH 02/14] fix(stop): preserve system_message on evolution block emission MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The evolution block path in stop.ts previously emitted {decision:"block", reason} but discarded any system_message that was populated in the same stop turn — level-up, catch, and achievement notifications would silently disappear the moment an evolution triggered. Merge the accumulated system_message into the block output so user-facing messages persist alongside Claude's block instruction. systemMessage is user-facing only per the Claude Code hooks spec, so it does not interfere with the reason field Claude consumes. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/hooks/stop.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/hooks/stop.ts b/src/hooks/stop.ts index c7ddb04d..d6224171 100644 --- a/src/hooks/stop.ts +++ b/src/hooks/stop.ts @@ -613,7 +613,17 @@ async function main(): Promise { .join('\n'); const reason = t('hook.evolution_block_reason', { candidateList }); playCry(); - console.log(JSON.stringify({ decision: 'block', reason })); + // Preserve level-up/achievement messages from the parent lock; systemMessage is + // user-facing only, so merging it here does not interfere with the block reason + // that Claude consumes. + const blockOutput: { decision: 'block'; reason: string; system_message?: string } = { + decision: 'block', + reason, + }; + if (messages.length > 0) { + blockOutput.system_message = messages.join('\n'); + } + console.log(JSON.stringify(blockOutput)); // Set evolution_prompt_shown AFTER block emission to avoid silent loss on crash. // If this write fails, the block will re-emit on next Stop — duplicate prompt From 7f0d832e6a25e14eb97c68724bf224dfa402192e Mon Sep 17 00:00:00 2001 From: Sangwon Lee Date: Mon, 20 Apr 2026 14:09:52 +0900 Subject: [PATCH 03/14] feat(dev): add /tkm:test-evolve harness for Stop-hook evolution flow Dev-only slash command that auto-cycles the full E2E path for the Stop-hook evolution AskUserQuestion feature. Per scenario: backup state and hooks, seed test party, swap hooks.json to worktree paths, spawn a fresh tmux pane with isolated CLAUDE_CONFIG_DIR, launch Claude Code, capture the AskUserQuestion render, send-keys the scenario's expected answer, 3-layer verify (UI regex, tokenmon evolve tool call, state diff), restore backup. No-arg runs all 6 scenarios sequentially; --scenario runs one; --restore cleans up after an aborted run; --dry-run lists scenarios without LLM cost. Harness is excluded from the published plugin via the new files allowlist in package.json. - 6 scenarios covering branch, single-chain, batch, overflow, refuse persistence, and accept-clear-reprompt lifecycle - backup.ts: dual-format hooks.json swap (baked absolute paths OR CLAUDE_PLUGIN_ROOT/DATA template vars), byte-level restore, gen-aware state/config paths; resolves hooks path via PLUGIN_ROOT walk-up - tmux-driver.ts: pane spawn with isolated CLAUDE_CONFIG_DIR, capture-with-pattern-wait, numeric + text send-keys, graceful tmux-missing fallback - verify.ts: 3-layer assertion from UI regex through tool call detection to state diff against expected_after - cli/test-evolve.ts: SIGINT handler + try/finally for crash-safe restore (duplicate prompt preferred over silent loss) - skills/test-evolve/SKILL.md: slash command entry delegating to the tsx CLI - Tighten existing e2e test types (any to Partial, Partial) and drop the unused execFileSync import Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 1 + package.json | 38 ++ skills/test-evolve/SKILL.md | 28 ++ src/cli/test-evolve.ts | 366 ++++++++++++++++++ src/test-evolve/backup.ts | 209 ++++++++++ src/test-evolve/tmux-driver.ts | 191 +++++++++ src/test-evolve/verify.ts | 244 ++++++++++++ src/test-scenarios/accept-clear-reprompt.json | 31 ++ src/test-scenarios/branch-eevee.json | 31 ++ src/test-scenarios/multi-3.json | 49 +++ src/test-scenarios/overflow-5.json | 67 ++++ src/test-scenarios/refuse-persist.json | 30 ++ src/test-scenarios/single-charmander.json | 31 ++ test/e2e/evolve-askuserquestion.test.ts | 5 +- 14 files changed, 1319 insertions(+), 2 deletions(-) create mode 100644 skills/test-evolve/SKILL.md create mode 100644 src/cli/test-evolve.ts create mode 100644 src/test-evolve/backup.ts create mode 100644 src/test-evolve/tmux-driver.ts create mode 100644 src/test-evolve/verify.ts create mode 100644 src/test-scenarios/accept-clear-reprompt.json create mode 100644 src/test-scenarios/branch-eevee.json create mode 100644 src/test-scenarios/multi-3.json create mode 100644 src/test-scenarios/overflow-5.json create mode 100644 src/test-scenarios/refuse-persist.json create mode 100644 src/test-scenarios/single-charmander.json diff --git a/.gitignore b/.gitignore index dd0c3e12..1a524a36 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ config.json .claude/ .worktrees/ TODO.txt +.tokenmon/test-backup/ diff --git a/package.json b/package.json index 428c566b..2a1cf271 100644 --- a/package.json +++ b/package.json @@ -26,6 +26,44 @@ "typescript": "^5.5.0" }, "license": "MIT", + "files": [ + "bin/", + ".claude-plugin/", + "src/cli/battle-turn.ts", + "src/cli/friendly-battle.ts", + "src/cli/friendly-battle-local.ts", + "src/cli/friendly-battle-spike.ts", + "src/cli/friendly-battle-turn.ts", + "src/cli/gym-list.ts", + "src/cli/moves.ts", + "src/cli/tokenmon.ts", + "src/core/", + "src/hooks/", + "src/setup/", + "src/friendly-battle/", + "src/i18n/", + "src/status-line.ts", + "skills/call/", + "skills/doctor/", + "skills/friendly-battle/", + "skills/gym/", + "skills/language/", + "skills/moves/", + "skills/name/", + "skills/relay-setup/", + "skills/reset/", + "skills/setup/", + "skills/tkm/", + "skills/uninstall/", + "data/", + "hooks/", + "cries/", + "sprites/", + "sfx/", + "README.md", + "README.ko.md", + "LICENSE" + ], "repository": { "type": "git", "url": "https://github.com/ThunderConch/tkm.git" diff --git a/skills/test-evolve/SKILL.md b/skills/test-evolve/SKILL.md new file mode 100644 index 00000000..22e93d0c --- /dev/null +++ b/skills/test-evolve/SKILL.md @@ -0,0 +1,28 @@ +--- +description: "Dev-only: E2E test harness for the evolution AskUserQuestion flow via tmux. Runs 6 scenarios in isolated Claude Code sessions and reports 3-layer verify results." +--- + +Run the dev-only `test-evolve` E2E harness. This is NOT shipped in the released plugin — it exercises the Stop-hook evolution block path end-to-end by spawning real Claude Code sessions inside tmux panes. + +```bash +P="${CLAUDE_PLUGIN_ROOT:-$(ls -d ~/.claude/plugins/marketplaces/tkm 2>/dev/null || ls -d ~/.claude/plugins/cache/tkm/tkm/*/ 2>/dev/null | sort -V | tail -1)}" +"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" ${ARGUMENTS} +``` + +## Usage + +| Command | Description | +|---------|-------------| +| `/tkm:test-evolve` | Run all 6 scenarios sequentially (tmux + real LLM cost) | +| `/tkm:test-evolve --scenario branch-eevee` | Run a single scenario by name | +| `/tkm:test-evolve --dry-run` | Validate scenarios + tmux, no LLM cost | +| `/tkm:test-evolve --restore` | Restore from latest backup and exit | + +## What it does + +1. Backs up the user's live `state.json`, `config.json`, and installed `hooks/hooks.json` to `.tokenmon/test-backup//`. +2. Rewrites `hooks.json` so hooks point at the worktree under test (dual-format: baked absolute OR `${CLAUDE_PLUGIN_ROOT}` template). +3. For each scenario: spawns a tmux pane with an isolated `CLAUDE_CONFIG_DIR`, seeds party state, launches `claude`, detects the AskUserQuestion UI, injects the expected choice via `tmux send-keys`, and runs 3-layer verification (UI regex + tool-call match + state diff). +4. On completion (or crash, or Ctrl+C) restores the backup byte-for-byte. + +Show the output table to the user. Any `FAIL` rows include the failing layer and diff detail. diff --git a/src/cli/test-evolve.ts b/src/cli/test-evolve.ts new file mode 100644 index 00000000..562b55a6 --- /dev/null +++ b/src/cli/test-evolve.ts @@ -0,0 +1,366 @@ +#!/usr/bin/env -S npx tsx +/** + * test-evolve.ts — Dev-only E2E test harness orchestrator for the evolution + * AskUserQuestion flow. + * + * Subcommands: + * (default) run all 6 scenarios sequentially + * --scenario run a single scenario by name + * --restore restore from the latest backup and exit + * --dry-run validate scenarios + check tmux, no LLM cost + * --help print usage + * + * Global try/finally ensures state is restored even on crash or SIGINT. + */ +import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'fs'; +import { fileURLToPath } from 'url'; +import { dirname, join, resolve } from 'path'; +import { getActiveGeneration } from '../core/paths.js'; +import { + createBackup, + getLatestBackup, + restoreBackup, + restoreHooksJson, + swapHooksJson, + type BackupManifest, +} from '../test-evolve/backup.js'; +import { + ASK_USER_QUESTION_UI_REGEX, + capturePane, + checkTmux, + killSession, + makeScenarioConfigDir, + sendKeys, + spawnPane, + waitForPattern, +} from '../test-evolve/tmux-driver.js'; +import { verifyScenario, type Scenario, type VerifyResult } from '../test-evolve/verify.js'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); +const REPO_ROOT = resolve(__dirname, '..', '..'); +const SCENARIOS_DIR = join(REPO_ROOT, 'src', 'test-scenarios'); + +// ── CLI parsing ── + +interface CliArgs { + help: boolean; + dryRun: boolean; + restore: boolean; + scenario: string | null; +} + +function parseArgs(argv: string[]): CliArgs { + const args: CliArgs = { help: false, dryRun: false, restore: false, scenario: null }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + switch (a) { + case '--help': + case '-h': + args.help = true; + break; + case '--dry-run': + args.dryRun = true; + break; + case '--restore': + args.restore = true; + break; + case '--scenario': + args.scenario = argv[++i] ?? null; + break; + default: + if (a.startsWith('--')) { + process.stderr.write(`test-evolve: unknown flag ${a}\n`); + } + } + } + return args; +} + +function printHelp(): void { + process.stdout.write( + [ + 'test-evolve — dev-only E2E harness for the evolution AskUserQuestion flow', + '', + 'Usage:', + ' test-evolve Run all 6 scenarios sequentially', + ' test-evolve --scenario Run a single scenario by name', + ' test-evolve --restore Restore from latest backup and exit', + ' test-evolve --dry-run Validate scenarios + tmux, no LLM cost', + ' test-evolve --help Show this help', + '', + 'Scenarios:', + ' branch-eevee, single-charmander, multi-3, overflow-5,', + ' refuse-persist, accept-clear-reprompt', + '', + ].join('\n'), + ); +} + +// ── Scenario loading ── + +function loadScenarios(): Scenario[] { + if (!existsSync(SCENARIOS_DIR)) { + throw new Error(`test-evolve: scenarios dir missing: ${SCENARIOS_DIR}`); + } + const files = readdirSync(SCENARIOS_DIR).filter((f) => f.endsWith('.json')); + return files.map((f) => JSON.parse(readFileSync(join(SCENARIOS_DIR, f), 'utf-8')) as Scenario); +} + +function loadScenarioByName(name: string): Scenario { + const path = join(SCENARIOS_DIR, `${name}.json`); + if (!existsSync(path)) { + throw new Error(`test-evolve: scenario not found: ${name} (expected at ${path})`); + } + return JSON.parse(readFileSync(path, 'utf-8')) as Scenario; +} + +// ── Seed writer ── + +interface SeedState { + pokemon: Record; + unlocked: string[]; + [k: string]: any; +} + +interface SeedConfig { + party: string[]; + [k: string]: any; +} + +function writeSeed(configDir: string, gen: string, scenario: Scenario): void { + const tokenmonDir = join(configDir, 'tokenmon', gen); + mkdirSync(tokenmonDir, { recursive: true }); + + const state: SeedState = { + pokemon: scenario.seed.pokemon, + unlocked: scenario.seed.unlocked, + }; + const config: SeedConfig = { party: scenario.seed.party }; + + writeFileSync(join(tokenmonDir, 'state.json'), JSON.stringify(state, null, 2), 'utf-8'); + writeFileSync(join(tokenmonDir, 'config.json'), JSON.stringify(config, null, 2), 'utf-8'); +} + +// ── Scenario runner ── + +interface ScenarioResult { + name: string; + verify: VerifyResult | null; + error: string | null; + durationMs: number; + cost_estimate_usd: number; +} + +async function runScenario(scenario: Scenario, gen: string): Promise { + const start = Date.now(); + const sessionName = `tkm-test-${scenario.name}`; + const configDir = makeScenarioConfigDir(scenario.name); + let paneId: string | null = null; + + try { + writeSeed(configDir, gen, scenario); + + // Spawn a claude pane. Minimal prompt asks LLM to just say "ok" to + // trigger the Stop hook which will emit the block. + const claudePrompt = `just say ok. if you get an AskUserQuestion about pokemon evolution, pick option ${scenario.expected_choice}.`; + const handle = spawnPane({ + sessionName, + envVars: { + CLAUDE_CONFIG_DIR: configDir, + TOKENMON_HOOK_MODE: '1', + }, + cwd: REPO_ROOT, + command: `claude -p ${JSON.stringify(claudePrompt)}`, + }); + paneId = handle.paneId; + + // Wait for AskUserQuestion UI to render (numbered option prefixes) + const uiMatch = await waitForPattern(paneId, ASK_USER_QUESTION_UI_REGEX, 120_000); + if (!uiMatch) { + return { + name: scenario.name, + verify: null, + error: 'timeout waiting for AskUserQuestion UI', + durationMs: Date.now() - start, + cost_estimate_usd: 0.1, + }; + } + + // Inject choice + sendKeys(paneId, scenario.expected_choice); + + // Wait for evolution completion — look for `tokenmon evolve` call + // signature or scenario completion markers. + await waitForPattern(paneId, /tokenmon\s+evolve\s+\d+/, 60_000); + + const captured = capturePane(paneId, { history: true }); + const verify = verifyScenario(captured, scenario, gen, configDir); + + return { + name: scenario.name, + verify, + error: null, + durationMs: Date.now() - start, + cost_estimate_usd: 0.2, + }; + } catch (err: any) { + return { + name: scenario.name, + verify: null, + error: err?.message ?? String(err), + durationMs: Date.now() - start, + cost_estimate_usd: 0.05, + }; + } finally { + killSession(sessionName); + } +} + +// ── Report ── + +function printReport(results: ScenarioResult[]): void { + const lines: string[] = []; + lines.push(''); + lines.push('┌──────────────────────────┬────────┬────────┬─────────┬────────┬──────────┐'); + lines.push('│ scenario │ result │ UI │ Tool │ State │ cost$ │'); + lines.push('├──────────────────────────┼────────┼────────┼─────────┼────────┼──────────┤'); + let totalCost = 0; + for (const r of results) { + const name = r.name.padEnd(24).slice(0, 24); + const overall = r.verify?.pass ? 'PASS ' : 'FAIL '; + const ui = r.verify?.layer_ui.pass ? 'ok ' : 'x '; + const tool = r.verify?.layer_tool.pass ? 'ok ' : 'x '; + const state = r.verify?.layer_state.pass ? 'ok ' : 'x '; + const cost = `$${r.cost_estimate_usd.toFixed(2)}`.padEnd(8); + totalCost += r.cost_estimate_usd; + lines.push(`│ ${name} │ ${overall} │ ${ui} │ ${tool} │ ${state} │ ${cost} │`); + if (r.error) { + lines.push(`│ error: ${r.error.slice(0, 62).padEnd(62)} │`); + } + if (r.verify && !r.verify.pass) { + if (!r.verify.layer_ui.pass) lines.push(`│ UI: ${r.verify.layer_ui.detail.slice(0, 66).padEnd(66)} │`); + if (!r.verify.layer_tool.pass) lines.push(`│ Tool: ${r.verify.layer_tool.detail.slice(0, 64).padEnd(64)} │`); + if (!r.verify.layer_state.pass) { + for (const d of r.verify.layer_state.diffs.slice(0, 4)) { + const detail = `${d.field}: expected=${JSON.stringify(d.expected)} actual=${JSON.stringify(d.actual)}`; + lines.push(`│ State: ${detail.slice(0, 63).padEnd(63)} │`); + } + } + } + } + lines.push('└──────────────────────────┴────────┴────────┴─────────┴────────┴──────────┘'); + const passed = results.filter((r) => r.verify?.pass).length; + lines.push(`Total: ${passed}/${results.length} passed, estimated cost $${totalCost.toFixed(2)}`); + lines.push(''); + process.stdout.write(lines.join('\n')); +} + +// ── Subcommand implementations ── + +async function runAll(scenarios: Scenario[], gen: string, backup: BackupManifest): Promise { + process.stdout.write(`test-evolve: backup @ ${backup.dir}\n`); + process.stdout.write(`test-evolve: swapping hooks.json -> ${REPO_ROOT}\n`); + const swap = swapHooksJson(REPO_ROOT); + process.stdout.write(`test-evolve: swap mode=${swap.mode} path=${swap.hooksPath}\n`); + + const results: ScenarioResult[] = []; + for (const s of scenarios) { + process.stdout.write(`\n── running ${s.name} ──\n`); + const r = await runScenario(s, gen); + results.push(r); + process.stdout.write(` done in ${r.durationMs}ms — ${r.verify?.pass ? 'PASS' : 'FAIL'}${r.error ? ` (${r.error})` : ''}\n`); + } + printReport(results); +} + +async function dryRun(scenarios: Scenario[], gen: string): Promise { + checkTmux(); + process.stdout.write(`test-evolve dry-run (gen=${gen})\n`); + process.stdout.write(`tmux: available\n`); + process.stdout.write(`scenarios loaded: ${scenarios.length}\n`); + for (const s of scenarios) { + const readyCount = Object.values(s.seed.pokemon).filter((p: any) => p?.evolution_ready).length; + process.stdout.write( + ` - ${s.name.padEnd(24)} party=${s.seed.party.length} ready=${readyCount} choice=${s.expected_choice}\n`, + ); + } + process.stdout.write(`\nNo LLM cost incurred. Run without --dry-run to execute.\n`); +} + +function doRestore(gen: string): void { + const latest = getLatestBackup(); + if (!latest) { + process.stderr.write('test-evolve --restore: no backup found under .tokenmon/test-backup/\n'); + process.exit(1); + } + process.stdout.write(`test-evolve: restoring from ${latest}\n`); + restoreBackup(latest, gen); + process.stdout.write(`test-evolve: restore complete\n`); +} + +// ── Main entry ── + +async function main(): Promise { + const args = parseArgs(process.argv.slice(2)); + if (args.help) { + printHelp(); + return; + } + + const gen = getActiveGeneration(); + + if (args.restore) { + doRestore(gen); + return; + } + + // Load scenarios early so --dry-run can validate them. + let scenarios: Scenario[]; + try { + scenarios = args.scenario ? [loadScenarioByName(args.scenario)] : loadScenarios(); + } catch (err: any) { + process.stderr.write(`${err?.message ?? err}\n`); + process.exit(1); + } + + if (args.dryRun) { + await dryRun(scenarios, gen); + return; + } + + checkTmux(); + const backup = createBackup(gen); + + // SIGINT handler — ensure restore even on Ctrl+C + const sigintHandler = () => { + process.stderr.write('\ntest-evolve: SIGINT — restoring backup before exit\n'); + try { + restoreBackup(backup.dir, gen); + restoreHooksJson(backup.dir); + } catch (err) { + process.stderr.write(`test-evolve: restore on SIGINT failed: ${err}\n`); + } + process.exit(130); + }; + process.on('SIGINT', sigintHandler); + + try { + await runAll(scenarios, gen, backup); + } finally { + process.off('SIGINT', sigintHandler); + try { + restoreBackup(backup.dir, gen); + restoreHooksJson(backup.dir); + process.stdout.write(`test-evolve: state restored from ${backup.dir}\n`); + } catch (err) { + process.stderr.write(`test-evolve: restore failed: ${err}\n`); + process.stderr.write(`test-evolve: manual recovery: test-evolve --restore\n`); + } + } +} + +main().catch((err) => { + process.stderr.write(`test-evolve: fatal: ${err?.stack ?? err}\n`); + process.exit(1); +}); diff --git a/src/test-evolve/backup.ts b/src/test-evolve/backup.ts new file mode 100644 index 00000000..7b7ea76a --- /dev/null +++ b/src/test-evolve/backup.ts @@ -0,0 +1,209 @@ +/** + * backup.ts — Dev-only backup/restore utility for the test-evolve harness. + * + * Backs up the user's live `state.json`, `config.json`, and the installed + * plugin's `hooks/hooks.json` to `.tokenmon/test-backup//`. + * Restores byte-perfect copies on completion or via `--restore`. + * + * `swapHooksJson()` supports BOTH baked-absolute paths (post-install form) AND + * `${CLAUDE_PLUGIN_ROOT}` / `${CLAUDE_PLUGIN_DATA}` template form for parity + * with `src/setup/postinstall.ts:bakeHookPaths()`. + */ +import { copyFileSync, existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from 'fs'; +import { dirname, join } from 'path'; +import { homedir } from 'os'; +import { DATA_DIR, PLUGIN_ROOT, configPath, statePath } from '../core/paths.js'; + +export interface BackupManifest { + timestamp: string; + dir: string; + generation: string; + hooksSource: string; + files: { + state: string; + config: string; + hooks: string; + }; +} + +/** + * Resolve the user's active hooks.json path. Prefers `CLAUDE_PLUGIN_ROOT` + * (via `core/paths.ts:PLUGIN_ROOT` which walks up to find `package.json`), + * then checks the canonical plugin marketplace install location under + * `~/.claude/plugins/marketplaces/tkm/`. Throws if none exist so the dev + * harness fails loudly instead of writing to a non-existent path. + */ +export function getInstalledHooksPath(): string { + const pluginRootHooks = join(PLUGIN_ROOT, 'hooks', 'hooks.json'); + if (existsSync(pluginRootHooks)) return pluginRootHooks; + + const marketplaceHooks = join(homedir(), '.claude', 'plugins', 'marketplaces', 'tkm', 'hooks', 'hooks.json'); + if (existsSync(marketplaceHooks)) return marketplaceHooks; + + throw new Error( + `Cannot locate active hooks.json. Checked:\n - ${pluginRootHooks}\n - ${marketplaceHooks}\n` + + `Set CLAUDE_PLUGIN_ROOT to your tkm install location and retry.`, + ); +} + +/** Byte-copy helper with ancestor mkdir. */ +function byteCopy(src: string, dst: string): void { + mkdirSync(dirname(dst), { recursive: true }); + copyFileSync(src, dst); +} + +/** + * Create a timestamped backup of state, config, and hooks.json. + * @param gen Active generation from `getActiveGeneration()` at call time. + */ +export function createBackup(gen: string): BackupManifest { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + const dir = join(DATA_DIR, 'test-backup', timestamp); + mkdirSync(dir, { recursive: true }); + + const statefile = statePath(gen); + const configfile = configPath(gen); + const hooksfile = getInstalledHooksPath(); + + const backupState = join(dir, 'state.json'); + const backupConfig = join(dir, 'config.json'); + const backupHooks = join(dir, 'hooks.json'); + + if (existsSync(statefile)) byteCopy(statefile, backupState); + if (existsSync(configfile)) byteCopy(configfile, backupConfig); + if (existsSync(hooksfile)) byteCopy(hooksfile, backupHooks); + + const manifest: BackupManifest = { + timestamp, + dir, + generation: gen, + hooksSource: hooksfile, + files: { state: backupState, config: backupConfig, hooks: backupHooks }, + }; + writeFileSync(join(dir, 'manifest.json'), JSON.stringify(manifest, null, 2), 'utf-8'); + return manifest; +} + +/** + * Restore all 3 files from a backup directory. Byte-identical restore. + */ +export function restoreBackup(backupDir: string, gen: string): void { + const manifestPath = join(backupDir, 'manifest.json'); + let hooksTarget = getInstalledHooksPath(); + if (existsSync(manifestPath)) { + try { + const m = JSON.parse(readFileSync(manifestPath, 'utf-8')) as BackupManifest; + if (m.hooksSource) hooksTarget = m.hooksSource; + } catch { + /* fall through */ + } + } + + const backupState = join(backupDir, 'state.json'); + const backupConfig = join(backupDir, 'config.json'); + const backupHooks = join(backupDir, 'hooks.json'); + + if (existsSync(backupState)) { + try { + byteCopy(backupState, statePath(gen)); + } catch (err) { + process.stderr.write(`test-evolve restore state: ${err}\n`); + } + } + if (existsSync(backupConfig)) { + try { + byteCopy(backupConfig, configPath(gen)); + } catch (err) { + process.stderr.write(`test-evolve restore config: ${err}\n`); + } + } + if (existsSync(backupHooks)) { + try { + byteCopy(backupHooks, hooksTarget); + } catch (err) { + process.stderr.write(`test-evolve restore hooks: ${err}\n`); + } + } +} + +/** Restore only hooks.json from a backup dir (independent restore for finally blocks). */ +export function restoreHooksJson(backupDir: string): void { + const manifestPath = join(backupDir, 'manifest.json'); + let hooksTarget = getInstalledHooksPath(); + if (existsSync(manifestPath)) { + try { + const m = JSON.parse(readFileSync(manifestPath, 'utf-8')) as BackupManifest; + if (m.hooksSource) hooksTarget = m.hooksSource; + } catch { + /* fall through */ + } + } + const backupHooks = join(backupDir, 'hooks.json'); + if (existsSync(backupHooks)) { + try { + byteCopy(backupHooks, hooksTarget); + } catch (err) { + process.stderr.write(`test-evolve restoreHooksJson: ${err}\n`); + } + } +} + +/** Find the most recent backup directory (lexicographic timestamp sort). */ +export function getLatestBackup(): string | null { + const base = join(DATA_DIR, 'test-backup'); + if (!existsSync(base)) return null; + try { + const entries = readdirSync(base) + .map((name) => ({ name, full: join(base, name) })) + .filter((e) => statSync(e.full).isDirectory()) + .sort((a, b) => b.name.localeCompare(a.name)); + return entries[0]?.full ?? null; + } catch { + return null; + } +} + +/** + * Rewrite hooks.json so all plugin paths point at the worktree. + * + * Detects both forms: + * - Template: `${CLAUDE_PLUGIN_ROOT}` / `${CLAUDE_PLUGIN_DATA}` present + * - Baked: absolute path prefix (post-install form, where each hook + * command has the plugin's install directory inlined as a literal path) + * + * Writes the rewritten content to the same path. The ORIGINAL is preserved in + * the backup dir via `createBackup()`, so callers MUST create a backup first. + */ +export function swapHooksJson(worktreePath: string): { mode: 'template' | 'baked' | 'noop'; hooksPath: string } { + const hooksPath = getInstalledHooksPath(); + if (!existsSync(hooksPath)) { + return { mode: 'noop', hooksPath }; + } + const original = readFileSync(hooksPath, 'utf-8'); + + // Template form first — preserve parity with postinstall.ts:bakeHookPaths() + if (original.includes('${CLAUDE_PLUGIN_ROOT}') || original.includes('${CLAUDE_PLUGIN_DATA}')) { + const rewritten = original + .replace(/\$\{CLAUDE_PLUGIN_ROOT\}/g, worktreePath) + .replace(/\$\{CLAUDE_PLUGIN_DATA\}/g, worktreePath); + writeFileSync(hooksPath, rewritten, 'utf-8'); + return { mode: 'template', hooksPath }; + } + + // Baked form: extract the common plugin-root prefix and replace. + // Heuristic: find the first baked absolute path matching `/…/hooks/hooks.json` + // or `/…/bin/tsx-resolve.sh` and extract its directory prefix. + const m = original.match(/"((?:\/[^"\s$]+))\/bin\/tsx-resolve\.sh"/); + if (m?.[1]) { + const bakedRoot = m[1]; + if (bakedRoot !== worktreePath) { + // Escape regex metacharacters in bakedRoot + const escaped = bakedRoot.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const rewritten = original.replace(new RegExp(escaped, 'g'), worktreePath); + writeFileSync(hooksPath, rewritten, 'utf-8'); + return { mode: 'baked', hooksPath }; + } + } + + return { mode: 'noop', hooksPath }; +} diff --git a/src/test-evolve/tmux-driver.ts b/src/test-evolve/tmux-driver.ts new file mode 100644 index 00000000..f3c88eae --- /dev/null +++ b/src/test-evolve/tmux-driver.ts @@ -0,0 +1,191 @@ +/** + * tmux-driver.ts — Thin TypeScript wrapper over the tmux CLI. + * + * Called from the single orchestrator process (no tsx boot per pane). + * Each spawned pane runs the `claude` binary directly; this module only + * shells out to tmux via `child_process.execFileSync` / `spawn`. + */ +import { execFileSync, spawn } from 'child_process'; +import { mkdtempSync, mkdirSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; + +export interface SpawnPaneOpts { + sessionName: string; + windowName?: string; + envVars: Record; + cwd: string; + command: string; // shell command line to run in the pane (e.g. `claude -p "..."`) +} + +export interface PaneHandle { + sessionName: string; + paneId: string; // tmux pane id like `%12` + configDir: string; // `CLAUDE_CONFIG_DIR` passed to the pane +} + +/** + * Verify tmux is installed. Exits with clear error if not (AC17). + */ +export function checkTmux(): void { + try { + execFileSync('tmux', ['-V'], { stdio: 'pipe' }); + } catch { + process.stderr.write( + 'test-evolve: tmux CLI not found. Install with e.g. `sudo apt install tmux` or `brew install tmux`.\n', + ); + process.exit(1); + } +} + +function tmuxCall(args: string[]): string { + try { + return execFileSync('tmux', args, { stdio: ['ignore', 'pipe', 'pipe'] }).toString('utf-8'); + } catch (err: any) { + const stderr = err?.stderr?.toString?.('utf-8') ?? ''; + throw new Error(`tmux ${args.join(' ')} failed: ${stderr || err.message}`); + } +} + +function tmuxCallSafe(args: string[]): string | null { + try { + return tmuxCall(args); + } catch { + return null; + } +} + +/** + * Create a fresh CLAUDE_CONFIG_DIR tempdir for the scenario. + * Returns `/.claude` — caller seeds state under this path. + */ +export function makeScenarioConfigDir(scenarioName: string): string { + const prefix = join(tmpdir(), `tkm-test-evolve-${scenarioName}-`); + const base = mkdtempSync(prefix); + const configDir = join(base, '.claude'); + mkdirSync(configDir, { recursive: true }); + return configDir; +} + +/** + * Spawn a new tmux session with a single pane. The pane inherits + * `opts.envVars` (including `CLAUDE_CONFIG_DIR`). Returns the pane id. + */ +export function spawnPane(opts: SpawnPaneOpts): PaneHandle { + // Kill any pre-existing session with the same name (idempotent) + tmuxCallSafe(['kill-session', '-t', opts.sessionName]); + + // Build env arg list for `new-session`: `-e KEY=VAL` pairs + const envArgs: string[] = []; + for (const [k, v] of Object.entries(opts.envVars)) { + envArgs.push('-e', `${k}=${v}`); + } + + // Create a detached session running the command. If command is empty, spawn a shell. + const args = [ + 'new-session', + '-d', + '-s', + opts.sessionName, + ...(opts.windowName ? ['-n', opts.windowName] : []), + '-c', + opts.cwd, + ...envArgs, + opts.command, + ]; + tmuxCall(args); + + // Resolve pane id — first pane of the session + const paneId = tmuxCall(['list-panes', '-t', opts.sessionName, '-F', '#{pane_id}']).trim().split('\n')[0]; + if (!paneId) { + throw new Error(`spawnPane: no pane id returned for session ${opts.sessionName}`); + } + + return { + sessionName: opts.sessionName, + paneId, + configDir: opts.envVars.CLAUDE_CONFIG_DIR ?? '', + }; +} + +/** + * Capture the pane's plaintext buffer (ANSI stripped by default via `-p` + * without `-e`). Returns the most recent visible buffer. + */ +export function capturePane(paneId: string, opts?: { history?: boolean }): string { + const args = ['capture-pane', '-p', '-t', paneId]; + if (opts?.history) { + // `-S -` means start of history + args.push('-S', '-'); + } + return tmuxCallSafe(args) ?? ''; +} + +/** + * Send keys to the pane. By default appends Enter. For AskUserQuestion + * injection, pass a numeric index (1-4) or literal string for Other. + */ +export function sendKeys(paneId: string, keys: string, opts?: { enter?: boolean }): void { + const sendEnter = opts?.enter !== false; + tmuxCall(['send-keys', '-t', paneId, keys]); + if (sendEnter) { + tmuxCall(['send-keys', '-t', paneId, 'Enter']); + } +} + +/** + * Kill a pane. Idempotent — does not throw if the pane is already dead. + */ +export function killPane(paneId: string): void { + tmuxCallSafe(['kill-pane', '-t', paneId]); +} + +/** Kill an entire session (all panes + window). Idempotent. */ +export function killSession(sessionName: string): void { + tmuxCallSafe(['kill-session', '-t', sessionName]); +} + +/** + * Poll `capturePane` at `intervalMs` until `regex` matches the captured + * text or `timeoutMs` elapses. Returns the match or null on timeout. + */ +export async function waitForPattern( + paneId: string, + regex: RegExp, + timeoutMs = 120_000, + intervalMs = 2_000, +): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + const text = capturePane(paneId, { history: true }); + const match = text.match(regex); + if (match) return match; + await sleep(intervalMs); + } + return null; +} + +/** + * Default AskUserQuestion UI detection regex. Matches numbered option prefixes + * rendered by Claude Code's question UI (e.g. ` 1. Vaporeon`, `2. …`). + */ +export const ASK_USER_QUESTION_UI_REGEX = /^\s*[1-4]\.\s/m; + +function sleep(ms: number): Promise { + return new Promise((r) => setTimeout(r, ms)); +} + +/** + * Helper: detached spawn with stdio inherited (used for spawning non-tmux + * helper processes). Exported for symmetry; not used in the orchestrator + * directly but available for manual smoke checks. + */ +export function spawnDetached(command: string, args: string[], env: Record): number { + const child = spawn(command, args, { + detached: true, + stdio: 'ignore', + env: { ...process.env, ...env }, + }); + child.unref(); + return child.pid ?? -1; +} diff --git a/src/test-evolve/verify.ts b/src/test-evolve/verify.ts new file mode 100644 index 00000000..86620eb3 --- /dev/null +++ b/src/test-evolve/verify.ts @@ -0,0 +1,244 @@ +/** + * verify.ts — 3-layer assertion module for test-evolve scenarios. + * + * Layer 1 (UI): regex on `tmux capture-pane` plaintext matches + * `scenario.expected_block.reason_contains` + * Layer 2 (Tool): captured pane text contains `tokenmon evolve ` + * Layer 3 (State): post-run state.json/config.json match + * `scenario.expected_after` assertions + */ +import { existsSync, readFileSync } from 'fs'; +import { join } from 'path'; + +export interface Scenario { + name: string; + description: string; + seed: { + party: string[]; + pokemon: Record; + unlocked: string[]; + }; + expected_block: { + decision: string; + reason_contains: string[]; + }; + expected_choice: string; + expected_after: Record; +} + +export interface LayerResult { + pass: boolean; + detail: string; +} + +export interface StateDiffEntry { + field: string; + expected: unknown; + actual: unknown; +} + +export interface VerifyResult { + scenario: string; + pass: boolean; + layer_ui: LayerResult; + layer_tool: LayerResult; + layer_state: LayerResult & { diffs: StateDiffEntry[] }; +} + +/** + * Layer 1 — UI render assertion. Every `reason_contains` fragment must appear + * (case-insensitive substring) somewhere in the captured text. + */ +export function verifyUI(capturedText: string, scenario: Scenario): LayerResult { + const lowered = capturedText.toLowerCase(); + const missed: string[] = []; + const matched: string[] = []; + for (const frag of scenario.expected_block.reason_contains) { + if (lowered.includes(frag.toLowerCase())) { + matched.push(frag); + } else { + missed.push(frag); + } + } + return { + pass: missed.length === 0, + detail: missed.length === 0 + ? `all ${matched.length} fragments matched` + : `missed: ${missed.join(', ')}`, + }; +} + +/** + * Layer 2 — tool call assertion. Looks for `tokenmon evolve ` + * or `tokenmon evolve ` in the captured text. is any party + * member that was evolution-ready in the seed; is `expected_choice` + * when it is a numeric pokemon id. + */ +export function verifyToolCall(capturedText: string, scenario: Scenario): LayerResult { + const readyFrom = Object.keys(scenario.seed.pokemon).filter( + (k) => scenario.seed.pokemon[k]?.evolution_ready, + ); + const choice = scenario.expected_choice; + const isNumericChoice = /^\d+$/.test(choice); + + // If user refused (non-numeric choice like "no"), we don't expect a + // `tokenmon evolve` call — success is absence of the call. + if (!isNumericChoice) { + const absent = !/\btokenmon\s+evolve\s+/.test(capturedText); + return { + pass: absent, + detail: absent ? 'no tokenmon evolve call (as expected for refuse)' : 'unexpected tokenmon evolve call found', + }; + } + + // Look for `tokenmon evolve ` with any of the ready from ids + for (const from of readyFrom) { + const re = new RegExp(`tokenmon\\s+evolve\\s+${from}\\s+${choice}`); + if (re.test(capturedText)) { + return { pass: true, detail: `found: tokenmon evolve ${from} ${choice}` }; + } + } + // Fallback: accept `tokenmon evolve ` (evolve without from id) + const fallback = new RegExp(`tokenmon\\s+evolve\\s+\\d+\\s+${choice}`); + if (fallback.test(capturedText)) { + return { pass: true, detail: `found fallback: tokenmon evolve … ${choice}` }; + } + return { pass: false, detail: `no tokenmon evolve call matching ${readyFrom.join('|')} → ${choice}` }; +} + +interface ReadableState { + pokemon?: Record; + unlocked?: string[]; + [k: string]: any; +} + +interface ReadableConfig { + party?: string[]; + [k: string]: any; +} + +function readJsonSafe(path: string): T | null { + if (!existsSync(path)) return null; + try { + return JSON.parse(readFileSync(path, 'utf-8')) as T; + } catch { + return null; + } +} + +/** + * Layer 3 — state diff. Reads state.json and config.json from the scenario's + * tempdir (configDirOverride) or from the live CLAUDE_DIR, then compares each + * `expected_after` field. + * + * Supported field forms: + * `pokemon..` — equality (null = field absent) + * `unlocked.includes` — array of ids that MUST be present in state.unlocked + * `unlocked.excludes` — array of ids that MUST NOT be present + * `party.includes` — array of ids that MUST be present in config.party + */ +export function verifyState( + scenario: Scenario, + gen: string, + configDirOverride?: string, +): LayerResult & { diffs: StateDiffEntry[] } { + const base = configDirOverride ?? process.env.CLAUDE_CONFIG_DIR ?? ''; + const tokenmonDir = base ? join(base, 'tokenmon', gen) : ''; + const statePath = tokenmonDir ? join(tokenmonDir, 'state.json') : ''; + const configPath = tokenmonDir ? join(tokenmonDir, 'config.json') : ''; + + const state = readJsonSafe(statePath) ?? {}; + const config = readJsonSafe(configPath) ?? {}; + + const diffs: StateDiffEntry[] = []; + + for (const [field, expected] of Object.entries(scenario.expected_after)) { + if (field.startsWith('pokemon.')) { + const parts = field.split('.'); + const id = parts[1]; + const key = parts.slice(2).join('.'); + const p = state.pokemon?.[id]; + const actual = p ? getByPath(p, key) : undefined; + if (!deepEqualOrNull(actual, expected)) { + diffs.push({ field, expected, actual }); + } + } else if (field === 'unlocked.includes') { + const arr = Array.isArray(expected) ? expected : []; + const unlocked = state.unlocked ?? []; + for (const id of arr) { + if (!unlocked.includes(id)) { + diffs.push({ field: `unlocked.includes[${id}]`, expected: true, actual: false }); + } + } + } else if (field === 'unlocked.excludes') { + const arr = Array.isArray(expected) ? expected : []; + const unlocked = state.unlocked ?? []; + for (const id of arr) { + if (unlocked.includes(id)) { + diffs.push({ field: `unlocked.excludes[${id}]`, expected: false, actual: true }); + } + } + } else if (field === 'party.includes') { + const arr = Array.isArray(expected) ? expected : []; + const party = config.party ?? []; + for (const id of arr) { + if (!party.includes(id)) { + diffs.push({ field: `party.includes[${id}]`, expected: true, actual: false }); + } + } + } else { + // Generic top-level field compare + const actual = (state as any)[field]; + if (!deepEqualOrNull(actual, expected)) { + diffs.push({ field, expected, actual }); + } + } + } + + return { + pass: diffs.length === 0, + detail: diffs.length === 0 ? 'all state assertions passed' : `${diffs.length} diff(s)`, + diffs, + }; +} + +/** Run all 3 layers and aggregate into a VerifyResult. */ +export function verifyScenario( + capturedText: string, + scenario: Scenario, + gen: string, + configDirOverride?: string, +): VerifyResult { + const layer_ui = verifyUI(capturedText, scenario); + const layer_tool = verifyToolCall(capturedText, scenario); + const layer_state = verifyState(scenario, gen, configDirOverride); + return { + scenario: scenario.name, + pass: layer_ui.pass && layer_tool.pass && layer_state.pass, + layer_ui, + layer_tool, + layer_state, + }; +} + +// ── helpers ── + +function getByPath(obj: any, path: string): unknown { + if (!path) return obj; + const parts = path.split('.'); + let cur = obj; + for (const p of parts) { + if (cur == null) return undefined; + cur = cur[p]; + } + return cur; +} + +/** + * Equality with null-coalesced undefined. `null` in `expected` means the field + * should be absent/undefined in actual. + */ +function deepEqualOrNull(actual: unknown, expected: unknown): boolean { + if (expected === null) return actual === undefined || actual === null; + return JSON.stringify(actual) === JSON.stringify(expected); +} diff --git a/src/test-scenarios/accept-clear-reprompt.json b/src/test-scenarios/accept-clear-reprompt.json new file mode 100644 index 00000000..4040bb46 --- /dev/null +++ b/src/test-scenarios/accept-clear-reprompt.json @@ -0,0 +1,31 @@ +{ + "name": "accept-clear-reprompt", + "description": "User accepts evolve — evolution_prompt_shown cleared on new pokemon key (tokenmon.ts:1078)", + "seed": { + "party": ["4"], + "pokemon": { + "4": { + "id": 4, + "level": 16, + "xp": 4096, + "friendship": 70, + "evolution_ready": true, + "evolution_options": ["5"], + "met": "starter" + } + }, + "unlocked": ["4"] + }, + "expected_block": { + "decision": "block", + "reason_contains": ["Charmander", "AskUserQuestion", "tokenmon evolve"] + }, + "expected_choice": "5", + "expected_after": { + "pokemon.5.met": "evolution", + "pokemon.5.evolution_prompt_shown": null, + "unlocked.includes": ["5"], + "party.includes": ["5"], + "pokemon.4.evolution_prompt_shown": null + } +} diff --git a/src/test-scenarios/branch-eevee.json b/src/test-scenarios/branch-eevee.json new file mode 100644 index 00000000..e6879827 --- /dev/null +++ b/src/test-scenarios/branch-eevee.json @@ -0,0 +1,31 @@ +{ + "name": "branch-eevee", + "description": "Eevee (#133) branch evolution — user picks Vaporeon (#134) from 8 options", + "seed": { + "party": ["133"], + "pokemon": { + "133": { + "id": 133, + "level": 25, + "xp": 15625, + "friendship": 220, + "evolution_ready": true, + "evolution_options": ["134", "135", "136", "196", "197", "470", "471", "700"], + "met": "starter" + } + }, + "unlocked": ["133"] + }, + "expected_block": { + "decision": "block", + "reason_contains": ["Eevee", "AskUserQuestion", "tokenmon evolve"] + }, + "expected_choice": "134", + "expected_after": { + "pokemon.134.met": "evolution", + "unlocked.includes": ["134"], + "unlocked.excludes": [], + "party.includes": ["134"], + "pokemon.133.evolution_prompt_shown": null + } +} diff --git a/src/test-scenarios/multi-3.json b/src/test-scenarios/multi-3.json new file mode 100644 index 00000000..38d9aa13 --- /dev/null +++ b/src/test-scenarios/multi-3.json @@ -0,0 +1,49 @@ +{ + "name": "multi-3", + "description": "3 pokemon simultaneously evolution-ready — batch block (stop.ts slice(0,4))", + "seed": { + "party": ["4", "7", "25"], + "pokemon": { + "4": { + "id": 4, + "level": 16, + "xp": 4096, + "friendship": 70, + "evolution_ready": true, + "evolution_options": ["5"], + "met": "starter" + }, + "7": { + "id": 7, + "level": 16, + "xp": 4096, + "friendship": 70, + "evolution_ready": true, + "evolution_options": ["8"], + "met": "wild" + }, + "25": { + "id": 25, + "level": 16, + "xp": 4096, + "friendship": 220, + "evolution_ready": true, + "evolution_options": ["26"], + "met": "wild" + } + }, + "unlocked": ["4", "7", "25"] + }, + "expected_block": { + "decision": "block", + "reason_contains": ["Charmander", "Squirtle", "Pikachu", "AskUserQuestion"] + }, + "expected_choice": "5", + "expected_after": { + "pokemon.5.met": "evolution", + "unlocked.includes": ["5"], + "unlocked.excludes": [], + "party.includes": ["5"], + "pokemon.4.evolution_prompt_shown": null + } +} diff --git a/src/test-scenarios/overflow-5.json b/src/test-scenarios/overflow-5.json new file mode 100644 index 00000000..42bc3167 --- /dev/null +++ b/src/test-scenarios/overflow-5.json @@ -0,0 +1,67 @@ +{ + "name": "overflow-5", + "description": "5 pokemon evolution-ready — only first 4 prompted this turn (batch cap at slice(0,4))", + "seed": { + "party": ["4", "7", "25", "133", "172"], + "pokemon": { + "4": { + "id": 4, + "level": 16, + "xp": 4096, + "friendship": 70, + "evolution_ready": true, + "evolution_options": ["5"], + "met": "starter" + }, + "7": { + "id": 7, + "level": 16, + "xp": 4096, + "friendship": 70, + "evolution_ready": true, + "evolution_options": ["8"], + "met": "wild" + }, + "25": { + "id": 25, + "level": 20, + "xp": 8000, + "friendship": 220, + "evolution_ready": true, + "evolution_options": ["26"], + "met": "wild" + }, + "133": { + "id": 133, + "level": 25, + "xp": 15625, + "friendship": 220, + "evolution_ready": true, + "evolution_options": ["134", "135", "136"], + "met": "wild" + }, + "172": { + "id": 172, + "level": 15, + "xp": 3375, + "friendship": 220, + "evolution_ready": true, + "evolution_options": ["25"], + "met": "wild" + } + }, + "unlocked": ["4", "7", "25", "133", "172"] + }, + "expected_block": { + "decision": "block", + "reason_contains": ["AskUserQuestion", "tokenmon evolve"] + }, + "expected_choice": "5", + "expected_after": { + "pokemon.5.met": "evolution", + "unlocked.includes": ["5"], + "unlocked.excludes": [], + "party.includes": ["5"], + "pokemon.172.evolution_prompt_shown": null + } +} diff --git a/src/test-scenarios/refuse-persist.json b/src/test-scenarios/refuse-persist.json new file mode 100644 index 00000000..88c23384 --- /dev/null +++ b/src/test-scenarios/refuse-persist.json @@ -0,0 +1,30 @@ +{ + "name": "refuse-persist", + "description": "User refuses evolution via Other:no — evolution_prompt_shown set, no re-prompt next turn", + "seed": { + "party": ["4"], + "pokemon": { + "4": { + "id": 4, + "level": 16, + "xp": 4096, + "friendship": 70, + "evolution_ready": true, + "evolution_options": ["5"], + "met": "starter" + } + }, + "unlocked": ["4"] + }, + "expected_block": { + "decision": "block", + "reason_contains": ["Charmander", "AskUserQuestion"] + }, + "expected_choice": "no", + "expected_after": { + "pokemon.4.evolution_prompt_shown": true, + "pokemon.4.evolution_ready": true, + "unlocked.excludes": ["5"], + "party.includes": ["4"] + } +} diff --git a/src/test-scenarios/single-charmander.json b/src/test-scenarios/single-charmander.json new file mode 100644 index 00000000..965ea781 --- /dev/null +++ b/src/test-scenarios/single-charmander.json @@ -0,0 +1,31 @@ +{ + "name": "single-charmander", + "description": "Charmander (#4) single-chain evolution — user accepts Charmeleon (#5)", + "seed": { + "party": ["4"], + "pokemon": { + "4": { + "id": 4, + "level": 16, + "xp": 4096, + "friendship": 70, + "evolution_ready": true, + "evolution_options": ["5"], + "met": "starter" + } + }, + "unlocked": ["4"] + }, + "expected_block": { + "decision": "block", + "reason_contains": ["Charmander", "AskUserQuestion", "tokenmon evolve"] + }, + "expected_choice": "5", + "expected_after": { + "pokemon.5.met": "evolution", + "unlocked.includes": ["5"], + "unlocked.excludes": [], + "party.includes": ["5"], + "pokemon.4.evolution_prompt_shown": null + } +} diff --git a/test/e2e/evolve-askuserquestion.test.ts b/test/e2e/evolve-askuserquestion.test.ts index 1569825a..b25de847 100644 --- a/test/e2e/evolve-askuserquestion.test.ts +++ b/test/e2e/evolve-askuserquestion.test.ts @@ -17,12 +17,13 @@ import { describe, it, before, after } from 'node:test'; import assert from 'node:assert/strict'; -import { execFileSync, spawnSync } from 'node:child_process'; +import { spawnSync } from 'node:child_process'; import { mkdtempSync, rmSync, writeFileSync, readFileSync, existsSync, mkdirSync } from 'node:fs'; import { tmpdir } from 'node:os'; import { dirname, join } from 'node:path'; import { fileURLToPath } from 'node:url'; import { makeState, makeConfig } from '../helpers.js'; +import type { State, Config } from '../../src/core/types.js'; const __dirname = dirname(fileURLToPath(import.meta.url)); const REPO_ROOT = join(__dirname, '..', '..'); @@ -61,7 +62,7 @@ function runStopHook(dataDir: string, stdinJson: string): RunOutput { }; } -function seedState(dataDir: string, gen: string, stateOverrides: any, configOverrides: any): void { +function seedState(dataDir: string, gen: string, stateOverrides: Partial, configOverrides: Partial): void { const genDir = join(dataDir, 'tokenmon', gen); mkdirSync(genDir, { recursive: true }); const state = makeState(stateOverrides); From 6a6984aebac4bf86ec39a40ed469d69bbfa41d7f Mon Sep 17 00:00:00 2001 From: Sangwon Lee Date: Mon, 20 Apr 2026 15:01:26 +0900 Subject: [PATCH 04/14] refactor(test-evolve): simplify harness to manual user-driven flow Drop all tmux automation, Claude Code spawning, and UI/tool-level asserts. Claude Code's Ink-based REPL does not accept tmux send-keys submissions, which made the "spawn a child session and auto-answer AskUserQuestion" path unreliable and expensive to debug. The simpler and more useful shape is: backup, seed, swap hooks.json, let the human trigger the prompt in their own live session, then verify state and restore. CLI subcommands are now: tokenmon test-evolve --list list all scenarios tokenmon test-evolve --setup backup + seed + swap tokenmon test-evolve --verify state diff vs expected tokenmon test-evolve --restore byte-level restore A tiny current.json pointer under .tokenmon/test-backup/ lets --verify and --restore work without passing the scenario name again. - cli/test-evolve.ts: 463 -> 156 lines, no tmux/spawn/waitForPattern - test-evolve/verify.ts: 245 -> 115 lines, state-only assertions - test-evolve/tmux-driver.ts: deleted - skills/test-evolve/SKILL.md: rewritten for manual dispatch Also pick up two earlier fixes needed to make the setup path actually work end-to-end: - backup.ts swapHooksJson: regex now accepts JSON-escaped `\"` surrounding baked absolute paths so hooks.json rewrites apply when the user runs with baked (post-install) hook paths - backup.ts: drop the hardcoded `/home/minsiwon00/...` fallback and resolve the active hooks.json via PLUGIN_ROOT + a marketplace fallback, throwing when neither exists instead of writing to a non-existent path Verified: typecheck passes; 1203/1203 tests pass; round-trip --setup branch-eevee then --restore produces byte-identical state, config, and hooks files. Co-Authored-By: Claude Opus 4.7 (1M context) --- skills/test-evolve/SKILL.md | 36 +-- src/cli/test-evolve.ts | 419 +++++++++------------------------ src/test-evolve/backup.ts | 7 +- src/test-evolve/tmux-driver.ts | 191 --------------- src/test-evolve/verify.ts | 188 +++------------ 5 files changed, 167 insertions(+), 674 deletions(-) delete mode 100644 src/test-evolve/tmux-driver.ts diff --git a/skills/test-evolve/SKILL.md b/skills/test-evolve/SKILL.md index 22e93d0c..78ec0a7d 100644 --- a/skills/test-evolve/SKILL.md +++ b/skills/test-evolve/SKILL.md @@ -1,28 +1,32 @@ --- -description: "Dev-only: E2E test harness for the evolution AskUserQuestion flow via tmux. Runs 6 scenarios in isolated Claude Code sessions and reports 3-layer verify results." +description: "Dev-only: manual test harness for the evolution AskUserQuestion flow. Backs up state, seeds a scenario party, and lets the user trigger the Stop-hook evolution prompt in their live session." --- -Run the dev-only `test-evolve` E2E harness. This is NOT shipped in the released plugin — it exercises the Stop-hook evolution block path end-to-end by spawning real Claude Code sessions inside tmux panes. +Dev-only test harness for the evolution AskUserQuestion flow. No tmux, no spawning — the user triggers the evolution prompt manually in this live session. ```bash P="${CLAUDE_PLUGIN_ROOT:-$(ls -d ~/.claude/plugins/marketplaces/tkm 2>/dev/null || ls -d ~/.claude/plugins/cache/tkm/tkm/*/ 2>/dev/null | sort -V | tail -1)}" -"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" ${ARGUMENTS} ``` -## Usage +## Dispatch -| Command | Description | -|---------|-------------| -| `/tkm:test-evolve` | Run all 6 scenarios sequentially (tmux + real LLM cost) | -| `/tkm:test-evolve --scenario branch-eevee` | Run a single scenario by name | -| `/tkm:test-evolve --dry-run` | Validate scenarios + tmux, no LLM cost | -| `/tkm:test-evolve --restore` | Restore from latest backup and exit | +- If `$ARGUMENTS` is `--list` or `--verify` or `--restore` or `--help`: + run `"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" ${ARGUMENTS}`, show output, stop. + +- Otherwise treat `$ARGUMENTS` as a scenario name and run setup: + `"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" --setup ${ARGUMENTS}` -## What it does + After setup succeeds, tell the user: -1. Backs up the user's live `state.json`, `config.json`, and installed `hooks/hooks.json` to `.tokenmon/test-backup//`. -2. Rewrites `hooks.json` so hooks point at the worktree under test (dual-format: baked absolute OR `${CLAUDE_PLUGIN_ROOT}` template). -3. For each scenario: spawns a tmux pane with an isolated `CLAUDE_CONFIG_DIR`, seeds party state, launches `claude`, detects the AskUserQuestion UI, injects the expected choice via `tmux send-keys`, and runs 3-layer verification (UI regex + tool-call match + state diff). -4. On completion (or crash, or Ctrl+C) restores the backup byte-for-byte. + > Party seeded for scenario **${ARGUMENTS}**. Send any short message to trigger the evolution prompt. When done: + > - `/tkm:test-evolve --verify` — check state vs expected_after + > - `/tkm:test-evolve --restore` — restore backup and clean up -Show the output table to the user. Any `FAIL` rows include the failing layer and diff detail. +## Usage + +| Command | Description | +|---------|-------------| +| `/tkm:test-evolve branch-eevee` | Seed Eevee branch-evolution scenario | +| `/tkm:test-evolve --list` | List all 6 scenarios | +| `/tkm:test-evolve --verify` | Compare live state vs expected_after | +| `/tkm:test-evolve --restore` | Restore backup, remove current.json | diff --git a/src/cli/test-evolve.ts b/src/cli/test-evolve.ts index 562b55a6..9c52d323 100644 --- a/src/cli/test-evolve.ts +++ b/src/cli/test-evolve.ts @@ -1,366 +1,157 @@ #!/usr/bin/env -S npx tsx /** - * test-evolve.ts — Dev-only E2E test harness orchestrator for the evolution - * AskUserQuestion flow. + * test-evolve.ts — Dev-only manual test harness for the evolution AskUserQuestion flow. * * Subcommands: - * (default) run all 6 scenarios sequentially - * --scenario run a single scenario by name - * --restore restore from the latest backup and exit - * --dry-run validate scenarios + check tmux, no LLM cost + * --list list all scenarios + * --setup backup, swap hooks.json, seed state/config + * --verify compare live state vs scenario expected_after + * --restore restore backup and clean up current.json * --help print usage - * - * Global try/finally ensures state is restored even on crash or SIGINT. */ -import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'fs'; +import { existsSync, mkdirSync, readFileSync, readdirSync, unlinkSync, writeFileSync } from 'fs'; import { fileURLToPath } from 'url'; import { dirname, join, resolve } from 'path'; -import { getActiveGeneration } from '../core/paths.js'; -import { - createBackup, - getLatestBackup, - restoreBackup, - restoreHooksJson, - swapHooksJson, - type BackupManifest, -} from '../test-evolve/backup.js'; -import { - ASK_USER_QUESTION_UI_REGEX, - capturePane, - checkTmux, - killSession, - makeScenarioConfigDir, - sendKeys, - spawnPane, - waitForPattern, -} from '../test-evolve/tmux-driver.js'; -import { verifyScenario, type Scenario, type VerifyResult } from '../test-evolve/verify.js'; +import { DATA_DIR, configPath, getActiveGeneration, statePath } from '../core/paths.js'; +import { createBackup, restoreBackup, swapHooksJson } from '../test-evolve/backup.js'; +import { verifyState, type Scenario } from '../test-evolve/verify.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); const REPO_ROOT = resolve(__dirname, '..', '..'); const SCENARIOS_DIR = join(REPO_ROOT, 'src', 'test-scenarios'); +const CURRENT_PTR = join(DATA_DIR, 'test-backup', 'current.json'); -// ── CLI parsing ── - -interface CliArgs { - help: boolean; - dryRun: boolean; - restore: boolean; - scenario: string | null; -} - -function parseArgs(argv: string[]): CliArgs { - const args: CliArgs = { help: false, dryRun: false, restore: false, scenario: null }; - for (let i = 0; i < argv.length; i++) { - const a = argv[i]; - switch (a) { - case '--help': - case '-h': - args.help = true; - break; - case '--dry-run': - args.dryRun = true; - break; - case '--restore': - args.restore = true; - break; - case '--scenario': - args.scenario = argv[++i] ?? null; - break; - default: - if (a.startsWith('--')) { - process.stderr.write(`test-evolve: unknown flag ${a}\n`); - } - } - } - return args; -} - -function printHelp(): void { - process.stdout.write( - [ - 'test-evolve — dev-only E2E harness for the evolution AskUserQuestion flow', - '', - 'Usage:', - ' test-evolve Run all 6 scenarios sequentially', - ' test-evolve --scenario Run a single scenario by name', - ' test-evolve --restore Restore from latest backup and exit', - ' test-evolve --dry-run Validate scenarios + tmux, no LLM cost', - ' test-evolve --help Show this help', - '', - 'Scenarios:', - ' branch-eevee, single-charmander, multi-3, overflow-5,', - ' refuse-persist, accept-clear-reprompt', - '', - ].join('\n'), - ); -} +interface CurrentPtr { backupDir: string; scenario: string; gen: string } // ── Scenario loading ── function loadScenarios(): Scenario[] { - if (!existsSync(SCENARIOS_DIR)) { - throw new Error(`test-evolve: scenarios dir missing: ${SCENARIOS_DIR}`); - } - const files = readdirSync(SCENARIOS_DIR).filter((f) => f.endsWith('.json')); - return files.map((f) => JSON.parse(readFileSync(join(SCENARIOS_DIR, f), 'utf-8')) as Scenario); + if (!existsSync(SCENARIOS_DIR)) throw new Error(`test-evolve: scenarios dir missing: ${SCENARIOS_DIR}`); + return readdirSync(SCENARIOS_DIR) + .filter((f) => f.endsWith('.json')) + .map((f) => JSON.parse(readFileSync(join(SCENARIOS_DIR, f), 'utf-8')) as Scenario); } function loadScenarioByName(name: string): Scenario { const path = join(SCENARIOS_DIR, `${name}.json`); - if (!existsSync(path)) { - throw new Error(`test-evolve: scenario not found: ${name} (expected at ${path})`); - } + if (!existsSync(path)) throw new Error(`test-evolve: scenario not found: ${name} (expected at ${path})`); return JSON.parse(readFileSync(path, 'utf-8')) as Scenario; } // ── Seed writer ── -interface SeedState { - pokemon: Record; - unlocked: string[]; - [k: string]: any; -} - -interface SeedConfig { - party: string[]; - [k: string]: any; -} - -function writeSeed(configDir: string, gen: string, scenario: Scenario): void { - const tokenmonDir = join(configDir, 'tokenmon', gen); - mkdirSync(tokenmonDir, { recursive: true }); - - const state: SeedState = { - pokemon: scenario.seed.pokemon, - unlocked: scenario.seed.unlocked, - }; - const config: SeedConfig = { party: scenario.seed.party }; - - writeFileSync(join(tokenmonDir, 'state.json'), JSON.stringify(state, null, 2), 'utf-8'); - writeFileSync(join(tokenmonDir, 'config.json'), JSON.stringify(config, null, 2), 'utf-8'); -} - -// ── Scenario runner ── - -interface ScenarioResult { - name: string; - verify: VerifyResult | null; - error: string | null; - durationMs: number; - cost_estimate_usd: number; -} - -async function runScenario(scenario: Scenario, gen: string): Promise { - const start = Date.now(); - const sessionName = `tkm-test-${scenario.name}`; - const configDir = makeScenarioConfigDir(scenario.name); - let paneId: string | null = null; - - try { - writeSeed(configDir, gen, scenario); - - // Spawn a claude pane. Minimal prompt asks LLM to just say "ok" to - // trigger the Stop hook which will emit the block. - const claudePrompt = `just say ok. if you get an AskUserQuestion about pokemon evolution, pick option ${scenario.expected_choice}.`; - const handle = spawnPane({ - sessionName, - envVars: { - CLAUDE_CONFIG_DIR: configDir, - TOKENMON_HOOK_MODE: '1', - }, - cwd: REPO_ROOT, - command: `claude -p ${JSON.stringify(claudePrompt)}`, - }); - paneId = handle.paneId; - - // Wait for AskUserQuestion UI to render (numbered option prefixes) - const uiMatch = await waitForPattern(paneId, ASK_USER_QUESTION_UI_REGEX, 120_000); - if (!uiMatch) { - return { - name: scenario.name, - verify: null, - error: 'timeout waiting for AskUserQuestion UI', - durationMs: Date.now() - start, - cost_estimate_usd: 0.1, - }; - } - - // Inject choice - sendKeys(paneId, scenario.expected_choice); - - // Wait for evolution completion — look for `tokenmon evolve` call - // signature or scenario completion markers. - await waitForPattern(paneId, /tokenmon\s+evolve\s+\d+/, 60_000); - - const captured = capturePane(paneId, { history: true }); - const verify = verifyScenario(captured, scenario, gen, configDir); - - return { - name: scenario.name, - verify, - error: null, - durationMs: Date.now() - start, - cost_estimate_usd: 0.2, - }; - } catch (err: any) { - return { - name: scenario.name, - verify: null, - error: err?.message ?? String(err), - durationMs: Date.now() - start, - cost_estimate_usd: 0.05, - }; - } finally { - killSession(sessionName); - } +function writeSeed(gen: string, scenario: Scenario, backupDir: string): void { + const sfile = statePath(gen); + const cfile = configPath(gen); + mkdirSync(dirname(sfile), { recursive: true }); + mkdirSync(dirname(cfile), { recursive: true }); + + const sBackup = join(backupDir, 'state.json'); + const cBackup = join(backupDir, 'config.json'); + const baseState: Record = existsSync(sBackup) + ? JSON.parse(readFileSync(sBackup, 'utf-8')) : {}; + const baseConfig: Record = existsSync(cBackup) + ? JSON.parse(readFileSync(cBackup, 'utf-8')) : {}; + + writeFileSync(sfile, JSON.stringify({ ...baseState, pokemon: scenario.seed.pokemon, unlocked: scenario.seed.unlocked }, null, 2), 'utf-8'); + writeFileSync(cfile, JSON.stringify({ ...baseConfig, party: scenario.seed.party, starter_chosen: true }, null, 2), 'utf-8'); } -// ── Report ── +// ── Subcommands ── -function printReport(results: ScenarioResult[]): void { - const lines: string[] = []; - lines.push(''); - lines.push('┌──────────────────────────┬────────┬────────┬─────────┬────────┬──────────┐'); - lines.push('│ scenario │ result │ UI │ Tool │ State │ cost$ │'); - lines.push('├──────────────────────────┼────────┼────────┼─────────┼────────┼──────────┤'); - let totalCost = 0; - for (const r of results) { - const name = r.name.padEnd(24).slice(0, 24); - const overall = r.verify?.pass ? 'PASS ' : 'FAIL '; - const ui = r.verify?.layer_ui.pass ? 'ok ' : 'x '; - const tool = r.verify?.layer_tool.pass ? 'ok ' : 'x '; - const state = r.verify?.layer_state.pass ? 'ok ' : 'x '; - const cost = `$${r.cost_estimate_usd.toFixed(2)}`.padEnd(8); - totalCost += r.cost_estimate_usd; - lines.push(`│ ${name} │ ${overall} │ ${ui} │ ${tool} │ ${state} │ ${cost} │`); - if (r.error) { - lines.push(`│ error: ${r.error.slice(0, 62).padEnd(62)} │`); - } - if (r.verify && !r.verify.pass) { - if (!r.verify.layer_ui.pass) lines.push(`│ UI: ${r.verify.layer_ui.detail.slice(0, 66).padEnd(66)} │`); - if (!r.verify.layer_tool.pass) lines.push(`│ Tool: ${r.verify.layer_tool.detail.slice(0, 64).padEnd(64)} │`); - if (!r.verify.layer_state.pass) { - for (const d of r.verify.layer_state.diffs.slice(0, 4)) { - const detail = `${d.field}: expected=${JSON.stringify(d.expected)} actual=${JSON.stringify(d.actual)}`; - lines.push(`│ State: ${detail.slice(0, 63).padEnd(63)} │`); - } - } - } +function doList(): void { + const scenarios = loadScenarios(); + process.stdout.write(`\ntest-evolve scenarios (${scenarios.length}):\n\n`); + for (const s of scenarios) { + const readyCount = Object.values(s.seed.pokemon).filter((p: any) => p?.evolution_ready).length; + process.stdout.write(` ${s.name.padEnd(26)} ${s.description}\n`); + process.stdout.write(` ${''.padEnd(26)} party=${s.seed.party.join(',')} ready=${readyCount} choice=${s.expected_choice}\n\n`); } - lines.push('└──────────────────────────┴────────┴────────┴─────────┴────────┴──────────┘'); - const passed = results.filter((r) => r.verify?.pass).length; - lines.push(`Total: ${passed}/${results.length} passed, estimated cost $${totalCost.toFixed(2)}`); - lines.push(''); - process.stdout.write(lines.join('\n')); } -// ── Subcommand implementations ── - -async function runAll(scenarios: Scenario[], gen: string, backup: BackupManifest): Promise { +function doSetup(scenarioName: string): void { + const gen = getActiveGeneration(); + const scenario = loadScenarioByName(scenarioName); + const backup = createBackup(gen); process.stdout.write(`test-evolve: backup @ ${backup.dir}\n`); - process.stdout.write(`test-evolve: swapping hooks.json -> ${REPO_ROOT}\n`); + const swap = swapHooksJson(REPO_ROOT); - process.stdout.write(`test-evolve: swap mode=${swap.mode} path=${swap.hooksPath}\n`); + process.stdout.write(`test-evolve: hooks.json swapped (mode=${swap.mode} path=${swap.hooksPath})\n`); - const results: ScenarioResult[] = []; - for (const s of scenarios) { - process.stdout.write(`\n── running ${s.name} ──\n`); - const r = await runScenario(s, gen); - results.push(r); - process.stdout.write(` done in ${r.durationMs}ms — ${r.verify?.pass ? 'PASS' : 'FAIL'}${r.error ? ` (${r.error})` : ''}\n`); - } - printReport(results); -} + writeSeed(gen, scenario, backup.dir); + process.stdout.write(`test-evolve: state seeded for scenario "${scenarioName}"\n`); -async function dryRun(scenarios: Scenario[], gen: string): Promise { - checkTmux(); - process.stdout.write(`test-evolve dry-run (gen=${gen})\n`); - process.stdout.write(`tmux: available\n`); - process.stdout.write(`scenarios loaded: ${scenarios.length}\n`); - for (const s of scenarios) { - const readyCount = Object.values(s.seed.pokemon).filter((p: any) => p?.evolution_ready).length; - process.stdout.write( - ` - ${s.name.padEnd(24)} party=${s.seed.party.length} ready=${readyCount} choice=${s.expected_choice}\n`, - ); - } - process.stdout.write(`\nNo LLM cost incurred. Run without --dry-run to execute.\n`); + mkdirSync(dirname(CURRENT_PTR), { recursive: true }); + const ptr: CurrentPtr = { backupDir: backup.dir, scenario: scenarioName, gen }; + writeFileSync(CURRENT_PTR, JSON.stringify(ptr, null, 2), 'utf-8'); + process.stdout.write(`test-evolve: pointer written to ${CURRENT_PTR}\n`); + process.stdout.write(`\nReady. Send any short message to trigger the evolution prompt.\nWhen done: tokenmon test-evolve --verify then tokenmon test-evolve --restore\n`); } -function doRestore(gen: string): void { - const latest = getLatestBackup(); - if (!latest) { - process.stderr.write('test-evolve --restore: no backup found under .tokenmon/test-backup/\n'); +function doVerify(): void { + if (!existsSync(CURRENT_PTR)) { + process.stderr.write('test-evolve --verify: no current.json found. Run --setup first.\n'); process.exit(1); } - process.stdout.write(`test-evolve: restoring from ${latest}\n`); - restoreBackup(latest, gen); - process.stdout.write(`test-evolve: restore complete\n`); -} - -// ── Main entry ── - -async function main(): Promise { - const args = parseArgs(process.argv.slice(2)); - if (args.help) { - printHelp(); - return; - } - - const gen = getActiveGeneration(); - - if (args.restore) { - doRestore(gen); - return; + const ptr = JSON.parse(readFileSync(CURRENT_PTR, 'utf-8')) as CurrentPtr; + const scenario = loadScenarioByName(ptr.scenario); + const result = verifyState(scenario, ptr.gen); + + process.stdout.write(`\ntest-evolve verify: scenario=${ptr.scenario}\n`); + for (const [field, expected] of Object.entries(scenario.expected_after)) { + const diff = result.diffs.find((d) => d.field === field || d.field.startsWith(`${field}[`)); + const pass = !diff; + process.stdout.write(` ${pass ? 'PASS' : 'FAIL'} ${field}: expected=${JSON.stringify(expected)}${diff ? ` actual=${JSON.stringify(diff.actual)}` : ''}\n`); } + process.stdout.write(`\nOverall: ${result.pass ? 'PASS' : `FAIL (${result.diffs.length} diff(s))`}\n\n`); + if (!result.pass) process.exit(1); +} - // Load scenarios early so --dry-run can validate them. - let scenarios: Scenario[]; - try { - scenarios = args.scenario ? [loadScenarioByName(args.scenario)] : loadScenarios(); - } catch (err: any) { - process.stderr.write(`${err?.message ?? err}\n`); +function doRestore(): void { + if (!existsSync(CURRENT_PTR)) { + process.stderr.write('test-evolve --restore: no current.json found. Nothing to restore.\n'); process.exit(1); } + const ptr = JSON.parse(readFileSync(CURRENT_PTR, 'utf-8')) as CurrentPtr; + restoreBackup(ptr.backupDir, ptr.gen); + process.stdout.write(`test-evolve: restored from ${ptr.backupDir}\n`); + unlinkSync(CURRENT_PTR); + process.stdout.write('test-evolve: current.json removed. Restore complete.\n'); +} - if (args.dryRun) { - await dryRun(scenarios, gen); - return; - } +function printHelp(): void { + process.stdout.write([ + 'test-evolve — dev-only manual harness for the evolution AskUserQuestion flow', + '', + 'Usage:', + ' tokenmon test-evolve --list list all scenarios', + ' tokenmon test-evolve --setup backup + seed + swap hooks.json', + ' tokenmon test-evolve --verify compare live state vs expected_after', + ' tokenmon test-evolve --restore restore backup, remove current.json', + ' tokenmon test-evolve --help show this help', + '', + ].join('\n')); +} - checkTmux(); - const backup = createBackup(gen); +// ── Main ── - // SIGINT handler — ensure restore even on Ctrl+C - const sigintHandler = () => { - process.stderr.write('\ntest-evolve: SIGINT — restoring backup before exit\n'); - try { - restoreBackup(backup.dir, gen); - restoreHooksJson(backup.dir); - } catch (err) { - process.stderr.write(`test-evolve: restore on SIGINT failed: ${err}\n`); - } - process.exit(130); - }; - process.on('SIGINT', sigintHandler); +function main(): void { + const argv = process.argv.slice(2); + const flag = argv[0]; - try { - await runAll(scenarios, gen, backup); - } finally { - process.off('SIGINT', sigintHandler); - try { - restoreBackup(backup.dir, gen); - restoreHooksJson(backup.dir); - process.stdout.write(`test-evolve: state restored from ${backup.dir}\n`); - } catch (err) { - process.stderr.write(`test-evolve: restore failed: ${err}\n`); - process.stderr.write(`test-evolve: manual recovery: test-evolve --restore\n`); - } + if (!flag || flag === '--help' || flag === '-h') { printHelp(); return; } + if (flag === '--list') { doList(); return; } + if (flag === '--setup') { + const name = argv[1]; + if (!name) { process.stderr.write('test-evolve --setup: scenario name required\n'); process.exit(1); } + doSetup(name); return; } -} + if (flag === '--verify') { doVerify(); return; } + if (flag === '--restore') { doRestore(); return; } -main().catch((err) => { - process.stderr.write(`test-evolve: fatal: ${err?.stack ?? err}\n`); + process.stderr.write(`test-evolve: unknown subcommand: ${flag}\nRun with --help for usage.\n`); process.exit(1); -}); +} + +main(); diff --git a/src/test-evolve/backup.ts b/src/test-evolve/backup.ts index 7b7ea76a..c2f0af52 100644 --- a/src/test-evolve/backup.ts +++ b/src/test-evolve/backup.ts @@ -191,9 +191,10 @@ export function swapHooksJson(worktreePath: string): { mode: 'template' | 'baked } // Baked form: extract the common plugin-root prefix and replace. - // Heuristic: find the first baked absolute path matching `/…/hooks/hooks.json` - // or `/…/bin/tsx-resolve.sh` and extract its directory prefix. - const m = original.match(/"((?:\/[^"\s$]+))\/bin\/tsx-resolve\.sh"/); + // Heuristic: find the first baked absolute path ending in `/bin/tsx-resolve.sh`. + // Terminator is NOT required to be `"` because hook command strings store the + // quote as the JSON escape `\"`, leaving `\` right after `.sh`. Accept anything. + const m = original.match(/(\/(?:[^"\s$\\]|\\(?!["\s$]))+)\/bin\/tsx-resolve\.sh/); if (m?.[1]) { const bakedRoot = m[1]; if (bakedRoot !== worktreePath) { diff --git a/src/test-evolve/tmux-driver.ts b/src/test-evolve/tmux-driver.ts deleted file mode 100644 index f3c88eae..00000000 --- a/src/test-evolve/tmux-driver.ts +++ /dev/null @@ -1,191 +0,0 @@ -/** - * tmux-driver.ts — Thin TypeScript wrapper over the tmux CLI. - * - * Called from the single orchestrator process (no tsx boot per pane). - * Each spawned pane runs the `claude` binary directly; this module only - * shells out to tmux via `child_process.execFileSync` / `spawn`. - */ -import { execFileSync, spawn } from 'child_process'; -import { mkdtempSync, mkdirSync } from 'fs'; -import { tmpdir } from 'os'; -import { join } from 'path'; - -export interface SpawnPaneOpts { - sessionName: string; - windowName?: string; - envVars: Record; - cwd: string; - command: string; // shell command line to run in the pane (e.g. `claude -p "..."`) -} - -export interface PaneHandle { - sessionName: string; - paneId: string; // tmux pane id like `%12` - configDir: string; // `CLAUDE_CONFIG_DIR` passed to the pane -} - -/** - * Verify tmux is installed. Exits with clear error if not (AC17). - */ -export function checkTmux(): void { - try { - execFileSync('tmux', ['-V'], { stdio: 'pipe' }); - } catch { - process.stderr.write( - 'test-evolve: tmux CLI not found. Install with e.g. `sudo apt install tmux` or `brew install tmux`.\n', - ); - process.exit(1); - } -} - -function tmuxCall(args: string[]): string { - try { - return execFileSync('tmux', args, { stdio: ['ignore', 'pipe', 'pipe'] }).toString('utf-8'); - } catch (err: any) { - const stderr = err?.stderr?.toString?.('utf-8') ?? ''; - throw new Error(`tmux ${args.join(' ')} failed: ${stderr || err.message}`); - } -} - -function tmuxCallSafe(args: string[]): string | null { - try { - return tmuxCall(args); - } catch { - return null; - } -} - -/** - * Create a fresh CLAUDE_CONFIG_DIR tempdir for the scenario. - * Returns `/.claude` — caller seeds state under this path. - */ -export function makeScenarioConfigDir(scenarioName: string): string { - const prefix = join(tmpdir(), `tkm-test-evolve-${scenarioName}-`); - const base = mkdtempSync(prefix); - const configDir = join(base, '.claude'); - mkdirSync(configDir, { recursive: true }); - return configDir; -} - -/** - * Spawn a new tmux session with a single pane. The pane inherits - * `opts.envVars` (including `CLAUDE_CONFIG_DIR`). Returns the pane id. - */ -export function spawnPane(opts: SpawnPaneOpts): PaneHandle { - // Kill any pre-existing session with the same name (idempotent) - tmuxCallSafe(['kill-session', '-t', opts.sessionName]); - - // Build env arg list for `new-session`: `-e KEY=VAL` pairs - const envArgs: string[] = []; - for (const [k, v] of Object.entries(opts.envVars)) { - envArgs.push('-e', `${k}=${v}`); - } - - // Create a detached session running the command. If command is empty, spawn a shell. - const args = [ - 'new-session', - '-d', - '-s', - opts.sessionName, - ...(opts.windowName ? ['-n', opts.windowName] : []), - '-c', - opts.cwd, - ...envArgs, - opts.command, - ]; - tmuxCall(args); - - // Resolve pane id — first pane of the session - const paneId = tmuxCall(['list-panes', '-t', opts.sessionName, '-F', '#{pane_id}']).trim().split('\n')[0]; - if (!paneId) { - throw new Error(`spawnPane: no pane id returned for session ${opts.sessionName}`); - } - - return { - sessionName: opts.sessionName, - paneId, - configDir: opts.envVars.CLAUDE_CONFIG_DIR ?? '', - }; -} - -/** - * Capture the pane's plaintext buffer (ANSI stripped by default via `-p` - * without `-e`). Returns the most recent visible buffer. - */ -export function capturePane(paneId: string, opts?: { history?: boolean }): string { - const args = ['capture-pane', '-p', '-t', paneId]; - if (opts?.history) { - // `-S -` means start of history - args.push('-S', '-'); - } - return tmuxCallSafe(args) ?? ''; -} - -/** - * Send keys to the pane. By default appends Enter. For AskUserQuestion - * injection, pass a numeric index (1-4) or literal string for Other. - */ -export function sendKeys(paneId: string, keys: string, opts?: { enter?: boolean }): void { - const sendEnter = opts?.enter !== false; - tmuxCall(['send-keys', '-t', paneId, keys]); - if (sendEnter) { - tmuxCall(['send-keys', '-t', paneId, 'Enter']); - } -} - -/** - * Kill a pane. Idempotent — does not throw if the pane is already dead. - */ -export function killPane(paneId: string): void { - tmuxCallSafe(['kill-pane', '-t', paneId]); -} - -/** Kill an entire session (all panes + window). Idempotent. */ -export function killSession(sessionName: string): void { - tmuxCallSafe(['kill-session', '-t', sessionName]); -} - -/** - * Poll `capturePane` at `intervalMs` until `regex` matches the captured - * text or `timeoutMs` elapses. Returns the match or null on timeout. - */ -export async function waitForPattern( - paneId: string, - regex: RegExp, - timeoutMs = 120_000, - intervalMs = 2_000, -): Promise { - const deadline = Date.now() + timeoutMs; - while (Date.now() < deadline) { - const text = capturePane(paneId, { history: true }); - const match = text.match(regex); - if (match) return match; - await sleep(intervalMs); - } - return null; -} - -/** - * Default AskUserQuestion UI detection regex. Matches numbered option prefixes - * rendered by Claude Code's question UI (e.g. ` 1. Vaporeon`, `2. …`). - */ -export const ASK_USER_QUESTION_UI_REGEX = /^\s*[1-4]\.\s/m; - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} - -/** - * Helper: detached spawn with stdio inherited (used for spawning non-tmux - * helper processes). Exported for symmetry; not used in the orchestrator - * directly but available for manual smoke checks. - */ -export function spawnDetached(command: string, args: string[], env: Record): number { - const child = spawn(command, args, { - detached: true, - stdio: 'ignore', - env: { ...process.env, ...env }, - }); - child.unref(); - return child.pid ?? -1; -} diff --git a/src/test-evolve/verify.ts b/src/test-evolve/verify.ts index 86620eb3..b500a1b8 100644 --- a/src/test-evolve/verify.ts +++ b/src/test-evolve/verify.ts @@ -1,14 +1,13 @@ /** - * verify.ts — 3-layer assertion module for test-evolve scenarios. + * verify.ts — State-only assertion for test-evolve scenarios. * - * Layer 1 (UI): regex on `tmux capture-pane` plaintext matches - * `scenario.expected_block.reason_contains` - * Layer 2 (Tool): captured pane text contains `tokenmon evolve ` - * Layer 3 (State): post-run state.json/config.json match - * `scenario.expected_after` assertions + * `verifyState(scenario, gen)` reads the live state.json / config.json and + * compares each `expected_after` field. Returns a structured result with + * per-field diffs so the CLI can print PASS/FAIL per field. */ import { existsSync, readFileSync } from 'fs'; import { join } from 'path'; +import { homedir } from 'os'; export interface Scenario { name: string; @@ -26,84 +25,16 @@ export interface Scenario { expected_after: Record; } -export interface LayerResult { - pass: boolean; - detail: string; -} - export interface StateDiffEntry { field: string; expected: unknown; actual: unknown; } -export interface VerifyResult { - scenario: string; +export interface StateVerifyResult { pass: boolean; - layer_ui: LayerResult; - layer_tool: LayerResult; - layer_state: LayerResult & { diffs: StateDiffEntry[] }; -} - -/** - * Layer 1 — UI render assertion. Every `reason_contains` fragment must appear - * (case-insensitive substring) somewhere in the captured text. - */ -export function verifyUI(capturedText: string, scenario: Scenario): LayerResult { - const lowered = capturedText.toLowerCase(); - const missed: string[] = []; - const matched: string[] = []; - for (const frag of scenario.expected_block.reason_contains) { - if (lowered.includes(frag.toLowerCase())) { - matched.push(frag); - } else { - missed.push(frag); - } - } - return { - pass: missed.length === 0, - detail: missed.length === 0 - ? `all ${matched.length} fragments matched` - : `missed: ${missed.join(', ')}`, - }; -} - -/** - * Layer 2 — tool call assertion. Looks for `tokenmon evolve ` - * or `tokenmon evolve ` in the captured text. is any party - * member that was evolution-ready in the seed; is `expected_choice` - * when it is a numeric pokemon id. - */ -export function verifyToolCall(capturedText: string, scenario: Scenario): LayerResult { - const readyFrom = Object.keys(scenario.seed.pokemon).filter( - (k) => scenario.seed.pokemon[k]?.evolution_ready, - ); - const choice = scenario.expected_choice; - const isNumericChoice = /^\d+$/.test(choice); - - // If user refused (non-numeric choice like "no"), we don't expect a - // `tokenmon evolve` call — success is absence of the call. - if (!isNumericChoice) { - const absent = !/\btokenmon\s+evolve\s+/.test(capturedText); - return { - pass: absent, - detail: absent ? 'no tokenmon evolve call (as expected for refuse)' : 'unexpected tokenmon evolve call found', - }; - } - - // Look for `tokenmon evolve ` with any of the ready from ids - for (const from of readyFrom) { - const re = new RegExp(`tokenmon\\s+evolve\\s+${from}\\s+${choice}`); - if (re.test(capturedText)) { - return { pass: true, detail: `found: tokenmon evolve ${from} ${choice}` }; - } - } - // Fallback: accept `tokenmon evolve ` (evolve without from id) - const fallback = new RegExp(`tokenmon\\s+evolve\\s+\\d+\\s+${choice}`); - if (fallback.test(capturedText)) { - return { pass: true, detail: `found fallback: tokenmon evolve … ${choice}` }; - } - return { pass: false, detail: `no tokenmon evolve call matching ${readyFrom.join('|')} → ${choice}` }; + detail: string; + diffs: StateDiffEntry[]; } interface ReadableState { @@ -126,29 +57,35 @@ function readJsonSafe(path: string): T | null { } } +function getByPath(obj: any, path: string): unknown { + if (!path) return obj; + let cur = obj; + for (const p of path.split('.')) { + if (cur == null) return undefined; + cur = cur[p]; + } + return cur; +} + +function deepEqualOrNull(actual: unknown, expected: unknown): boolean { + if (expected === null) return actual === undefined || actual === null; + return JSON.stringify(actual) === JSON.stringify(expected); +} + /** - * Layer 3 — state diff. Reads state.json and config.json from the scenario's - * tempdir (configDirOverride) or from the live CLAUDE_DIR, then compares each - * `expected_after` field. + * Read live state.json / config.json and compare against scenario.expected_after. * * Supported field forms: - * `pokemon..` — equality (null = field absent) - * `unlocked.includes` — array of ids that MUST be present in state.unlocked - * `unlocked.excludes` — array of ids that MUST NOT be present - * `party.includes` — array of ids that MUST be present in config.party + * `pokemon..` — equality (null = field absent) + * `unlocked.includes` — array of ids that MUST be present in state.unlocked + * `unlocked.excludes` — array of ids that MUST NOT be present + * `party.includes` — array of ids that MUST be present in config.party */ -export function verifyState( - scenario: Scenario, - gen: string, - configDirOverride?: string, -): LayerResult & { diffs: StateDiffEntry[] } { - const base = configDirOverride ?? process.env.CLAUDE_CONFIG_DIR ?? ''; - const tokenmonDir = base ? join(base, 'tokenmon', gen) : ''; - const statePath = tokenmonDir ? join(tokenmonDir, 'state.json') : ''; - const configPath = tokenmonDir ? join(tokenmonDir, 'config.json') : ''; - - const state = readJsonSafe(statePath) ?? {}; - const config = readJsonSafe(configPath) ?? {}; +export function verifyState(scenario: Scenario, gen: string): StateVerifyResult { + const claudeDir = process.env.CLAUDE_CONFIG_DIR ?? join(homedir(), '.claude'); + const tokenmonDir = join(claudeDir, 'tokenmon', gen); + const state = readJsonSafe(join(tokenmonDir, 'state.json')) ?? {}; + const config = readJsonSafe(join(tokenmonDir, 'config.json')) ?? {}; const diffs: StateDiffEntry[] = []; @@ -159,39 +96,31 @@ export function verifyState( const key = parts.slice(2).join('.'); const p = state.pokemon?.[id]; const actual = p ? getByPath(p, key) : undefined; - if (!deepEqualOrNull(actual, expected)) { - diffs.push({ field, expected, actual }); - } + if (!deepEqualOrNull(actual, expected)) diffs.push({ field, expected, actual }); } else if (field === 'unlocked.includes') { const arr = Array.isArray(expected) ? expected : []; const unlocked = state.unlocked ?? []; for (const id of arr) { - if (!unlocked.includes(id)) { + if (!unlocked.includes(id)) diffs.push({ field: `unlocked.includes[${id}]`, expected: true, actual: false }); - } } } else if (field === 'unlocked.excludes') { const arr = Array.isArray(expected) ? expected : []; const unlocked = state.unlocked ?? []; for (const id of arr) { - if (unlocked.includes(id)) { + if (unlocked.includes(id)) diffs.push({ field: `unlocked.excludes[${id}]`, expected: false, actual: true }); - } } } else if (field === 'party.includes') { const arr = Array.isArray(expected) ? expected : []; const party = config.party ?? []; for (const id of arr) { - if (!party.includes(id)) { + if (!party.includes(id)) diffs.push({ field: `party.includes[${id}]`, expected: true, actual: false }); - } } } else { - // Generic top-level field compare const actual = (state as any)[field]; - if (!deepEqualOrNull(actual, expected)) { - diffs.push({ field, expected, actual }); - } + if (!deepEqualOrNull(actual, expected)) diffs.push({ field, expected, actual }); } } @@ -201,44 +130,3 @@ export function verifyState( diffs, }; } - -/** Run all 3 layers and aggregate into a VerifyResult. */ -export function verifyScenario( - capturedText: string, - scenario: Scenario, - gen: string, - configDirOverride?: string, -): VerifyResult { - const layer_ui = verifyUI(capturedText, scenario); - const layer_tool = verifyToolCall(capturedText, scenario); - const layer_state = verifyState(scenario, gen, configDirOverride); - return { - scenario: scenario.name, - pass: layer_ui.pass && layer_tool.pass && layer_state.pass, - layer_ui, - layer_tool, - layer_state, - }; -} - -// ── helpers ── - -function getByPath(obj: any, path: string): unknown { - if (!path) return obj; - const parts = path.split('.'); - let cur = obj; - for (const p of parts) { - if (cur == null) return undefined; - cur = cur[p]; - } - return cur; -} - -/** - * Equality with null-coalesced undefined. `null` in `expected` means the field - * should be absent/undefined in actual. - */ -function deepEqualOrNull(actual: unknown, expected: unknown): boolean { - if (expected === null) return actual === undefined || actual === null; - return JSON.stringify(actual) === JSON.stringify(expected); -} From 7b1a558f4c223f2405a1fe5684035e18ffb275e7 Mon Sep 17 00:00:00 2001 From: Sangwon Lee Date: Mon, 20 Apr 2026 15:03:53 +0900 Subject: [PATCH 05/14] fix(test-evolve): auto verify + auto restore after evolution event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous SKILL.md asked the user to manually invoke /tkm:test-evolve --verify and /tkm:test-evolve --restore at the end of each scenario. That contradicted the original design goal — the cycle (seed → test → verify → restore) must always close itself so the user's live state and hooks.json never stay mutated past the test window. Update the skill to orchestrate the full cycle across turns: 1. On /tkm:test-evolve : run --setup, tell the user to send any message, then stop the turn. 2. On the next turn, after the Stop hook emits the block reason and the user picks an option (or refuses) via AskUserQuestion, run `tokenmon evolve ` for the chosen target(s). 3. Immediately after the evolve call(s) succeed, run --verify and then --restore, always, even if verify reports FAIL. Restore is unconditional so the user's real state/config/hooks.json come back to pre-test bytes. --restore is still exposed as a manual escape hatch for the case where the session is killed before the auto-restore step runs. --verify is no longer a user-facing command. Co-Authored-By: Claude Opus 4.7 (1M context) --- skills/test-evolve/SKILL.md | 66 ++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/skills/test-evolve/SKILL.md b/skills/test-evolve/SKILL.md index 78ec0a7d..8221fd74 100644 --- a/skills/test-evolve/SKILL.md +++ b/skills/test-evolve/SKILL.md @@ -1,32 +1,66 @@ --- -description: "Dev-only: manual test harness for the evolution AskUserQuestion flow. Backs up state, seeds a scenario party, and lets the user trigger the Stop-hook evolution prompt in their live session." +description: "Dev-only: manual test harness for the evolution AskUserQuestion flow. Backs up state, seeds a scenario party, auto-verifies + auto-restores after the user completes the evolution prompt." --- -Dev-only test harness for the evolution AskUserQuestion flow. No tmux, no spawning — the user triggers the evolution prompt manually in this live session. +Dev-only test harness for the evolution AskUserQuestion flow. No tmux, no spawning — the user triggers the evolution prompt manually in this live session. Verify and restore run automatically; the user only has to pick the scenario and click through the `AskUserQuestion` UI. ```bash P="${CLAUDE_PLUGIN_ROOT:-$(ls -d ~/.claude/plugins/marketplaces/tkm 2>/dev/null || ls -d ~/.claude/plugins/cache/tkm/tkm/*/ 2>/dev/null | sort -V | tail -1)}" ``` -## Dispatch +## Lifecycle (one scenario, auto verify + restore) -- If `$ARGUMENTS` is `--list` or `--verify` or `--restore` or `--help`: - run `"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" ${ARGUMENTS}`, show output, stop. +When `$ARGUMENTS` is a scenario name (not a flag starting with `--`), execute this **multi-turn** protocol. -- Otherwise treat `$ARGUMENTS` as a scenario name and run setup: - `"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" --setup ${ARGUMENTS}` +### Turn 1 — setup (this turn) - After setup succeeds, tell the user: +1. Run: `"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" --setup ${ARGUMENTS}` +2. Show the setup output. +3. Remember (carry into the next turn): a test cycle is active for scenario `${ARGUMENTS}`. The CLI wrote `.tokenmon/test-backup/current.json` as a persistent marker — checking for its existence confirms the cycle is still mid-flight. +4. Tell the user, verbatim: - > Party seeded for scenario **${ARGUMENTS}**. Send any short message to trigger the evolution prompt. When done: - > - `/tkm:test-evolve --verify` — check state vs expected_after - > - `/tkm:test-evolve --restore` — restore backup and clean up + > Party seeded for **${ARGUMENTS}**. Send any short message to trigger the Stop-hook evolution prompt. After you click an option (or `Refuse`), I'll auto-verify and auto-restore. + +5. Stop the turn. Do **not** run verify or restore yet. + +### Turn N — user-triggered evolution event + +The user sends a message. The Stop hook emits `{"decision":"block", "reason": ...}` which arrives back as Claude-visible feedback, with instructions to call `AskUserQuestion` for each evolution candidate. Render the question(s) exactly as instructed by the block reason. For each user selection: + +- If the user picked a target, run: `tokenmon evolve ` +- If the user refused, do nothing (the `evolution_prompt_shown` flag already gates re-prompting) + +### Turn N+1 — auto verify + auto restore (MANDATORY) + +Immediately after the `tokenmon evolve` call(s) succeed (or the refuse path completes), and **before responding with anything else**, run these two commands in order: + +1. `"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" --verify` +2. `"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" --restore` + +Restore **always** runs, even if verify reports FAIL. The user's live state/config/hooks.json are only safe once `--restore` completes. + +Show the combined output to the user as a compact summary: scenario name, verify verdict (PASS / FAIL + failing fields), restore confirmation. + +## Dispatch for flag arguments + +- `--list` → `"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" --list`, show output, stop. +- `--restore` → same, `--restore`. For emergency cleanup when an earlier cycle did not auto-restore (e.g. the session was killed mid-test). +- `--help` → same, `--help`. +- `--verify` is present in the CLI but should not be invoked directly by users; it is called automatically as part of the lifecycle above. ## Usage -| Command | Description | -|---------|-------------| -| `/tkm:test-evolve branch-eevee` | Seed Eevee branch-evolution scenario | +| Command | Behavior | +|---------|---------| +| `/tkm:test-evolve branch-eevee` | Setup → user triggers → auto verify + auto restore | | `/tkm:test-evolve --list` | List all 6 scenarios | -| `/tkm:test-evolve --verify` | Compare live state vs expected_after | -| `/tkm:test-evolve --restore` | Restore backup, remove current.json | +| `/tkm:test-evolve --restore` | Emergency restore (only needed if auto-restore was skipped) | + +## Scenarios (see `src/test-scenarios/*.json`) + +- `branch-eevee` — 8-way branch, expect Vaporeon +- `single-charmander` — single-chain, expect Charmeleon +- `multi-3` — 3 pokemon ready, batch in one `AskUserQuestion` +- `overflow-5` — 5 pokemon ready, first 4 this turn, 5th deferred +- `refuse-persist` — user refuses, verify `evolution_prompt_shown` is set +- `accept-clear-reprompt` — accept → flag cleared on the new pokemon key From 7f1ac4a76f7f2cc6a1b32b16256c590b7c10337c Mon Sep 17 00:00:00 2001 From: Sangwon Lee Date: Mon, 20 Apr 2026 15:34:04 +0900 Subject: [PATCH 06/14] fix(stop): emit evolution block on first stop + drop status-line hint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two adjustments so the evolution prompt is always visible through the AskUserQuestion path: 1. Block detection moves BEFORE the first_stop/no_delta early return in stop.ts. The prior placement meant that if evolution_ready was already set when a new session started (e.g. after a cheat/test seed, or a resumed conversation where conditions had been met but the block had never fired), the very first Stop silently returned and the user had to send a second message before AskUserQuestion surfaced. Running block detection regardless of the lock result kind fixes this and keeps the existing `evolution_prompt_shown` guard so duplicate blocks are still prevented. 2. Drop the `evolution_ready` hint from the status line. Because the Stop hook now reliably produces an AskUserQuestion prompt on every qualifying stop, rendering the same "pokemon ready to evolve" notice in the status line was redundant noise — the prompt itself is the canonical surface. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/hooks/stop.ts | 28 ++++++++++++++++------------ src/status-line.ts | 13 ++++--------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/hooks/stop.ts b/src/hooks/stop.ts index d6224171..6093160c 100644 --- a/src/hooks/stop.ts +++ b/src/hooks/stop.ts @@ -579,20 +579,14 @@ async function main(): Promise { return; } - if (result.value === 'first_stop' || result.value === 'no_delta') { - playCry(); - console.log(JSON.stringify(output)); - return; - } - - if (messages.length > 0) { - output.system_message = messages.join('\n'); - } - - // ── Evolution block detection (post-lock) ── + // ── Evolution block detection (post-lock, runs regardless of result type) ── // Scan party for pokemon with evolution_ready && !evolution_prompt_shown. // If found, emit decision:"block" with a reason instructing Claude to use - // AskUserQuestion. Flag is set AFTER block emission (Risk 6: duplication > loss). + // AskUserQuestion. Runs BEFORE the first_stop/no_delta early return so the + // prompt fires on the very first turn of a session where evolution is + // already pending (e.g. after a cheat/test seed, or a resumed session + // where evolution conditions were met but the user had not yet been + // prompted). Flag is set AFTER block emission (Risk 6: duplication > loss). { const postConfig = readConfig(gen); const postState = readState(gen); @@ -646,6 +640,16 @@ async function main(): Promise { } } + if (result.value === 'first_stop' || result.value === 'no_delta') { + playCry(); + console.log(JSON.stringify(output)); + return; + } + + if (messages.length > 0) { + output.system_message = messages.join('\n'); + } + playCry(); console.log(JSON.stringify(output)); } diff --git a/src/status-line.ts b/src/status-line.ts index f3e0235f..32ed1f4e 100644 --- a/src/status-line.ts +++ b/src/status-line.ts @@ -596,16 +596,11 @@ function main(): void { print(state.last_drop); } else if (state.last_tip) { print(state.last_tip.text); - } else { - // Show evolution_ready hint for party pokemon with pending branching evolution - for (const pokemonName of config.party) { - const pState = state.pokemon[pokemonName]; - if (pState?.evolution_ready && !pState?.evolution_prompt_shown) { - print(t('statusline.evolution_ready', { pokemon: getPokemonName(pokemonName) })); - break; - } - } } + // Note: evolution_ready no longer shows in the status line. The Stop hook + // emits a decision:"block" with an AskUserQuestion instruction on any stop + // where evolution is pending, so surfacing the same pokemon twice (status + // line + block prompt) is redundant noise. // === Tier preview line (independent, always shown when non-normal) === if (state.pending_tier) { From 72bdd78c1e7aec778b69b7a4557c5000b782982e Mon Sep 17 00:00:00 2001 From: Sangwon Lee Date: Mon, 20 Apr 2026 15:45:41 +0900 Subject: [PATCH 07/14] fix(i18n): verbatim pokemon-voice question + overflow rule for 4+ branches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three polish items surfaced during live testing of the evolution AskUserQuestion flow: 1. Tone mismatch. Claude was paraphrasing the AskUserQuestion question text instead of using the pokemon-voice phrasing ("...어라!? {pokemon}의 상태가...?") that the status line had been using. Each locale's hook.evolution_candidate_line now carries the exact per-pokemon question string under a "use VERBATIM" label, and hook.evolution_block_reason instructs Claude to copy that string into AskUserQuestion.question without any rewording. 2. Overflow handling for branches with more than 3 targets (Eevee has 8). AskUserQuestion caps at 4 options, so the previous "all targets + Refuse" instruction silently broke for Eevee-class pokemon. The new reason text spells out the rule: ≤3 targets show all + Refuse, 4+ targets show the first 3 + Refuse and list the remaining targets in the question body so the user can pick any of them via 'Other'. 3. Cross-gen name resolution in getPokemonName. Seed data (and any other path that surfaces a pokemon not native to the active generation's i18n) was falling back to the numeric ID, so the block reason rendered "133" instead of "이브이". Added a cross-gen lookup that searches each generation's game i18n before using the ID as the final fallback; the active generation is still preferred. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/pokemon-data.ts | 20 ++++++++++++++++++-- src/i18n/en.json | 4 ++-- src/i18n/en.pokemon.json | 4 ++-- src/i18n/ko.json | 4 ++-- src/i18n/ko.pokemon.json | 4 ++-- 5 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/core/pokemon-data.ts b/src/core/pokemon-data.ts index e897960b..f27e4712 100644 --- a/src/core/pokemon-data.ts +++ b/src/core/pokemon-data.ts @@ -222,13 +222,29 @@ export function getGameI18n(locale?: string, gen?: string): GameI18nData { return _gameI18n[key]; } +const NAME_LOOKUP_GENS = ['gen1', 'gen2', 'gen3', 'gen4', 'gen5', 'gen6', 'gen7', 'gen8', 'gen9']; + export function getPokemonName(id: string | number, gen?: string, shiny?: boolean): string { const g = gen ?? getActiveGeneration(); getPokemonDB(g); const strId = String(id); const baseId = toBaseId(strId); - const i18n = getGameI18n(undefined, g); - const name = i18n.pokemon[baseId] || baseId; + let name = getGameI18n(undefined, g).pokemon[baseId]; + if (!name) { + // Cross-gen fallback: a pokemon may be displayed in an active gen that + // does not natively index it (e.g. seed data, migration, cross-gen refs). + // Search other gens' i18n so we surface a real name instead of the ID. + for (const og of NAME_LOOKUP_GENS) { + if (og === g) continue; + try { + const hit = getGameI18n(undefined, og).pokemon[baseId]; + if (hit) { name = hit; break; } + } catch { + // gen's data not installed — skip silently + } + } + } + if (!name) name = baseId; if (shiny || isShinyKey(strId)) return '★' + name; return name; } diff --git a/src/i18n/en.json b/src/i18n/en.json index 2e275672..24b203b6 100644 --- a/src/i18n/en.json +++ b/src/i18n/en.json @@ -404,8 +404,8 @@ "hook.levelup": "⬆️ {pokemon} Lv.{from} → Lv.{to}! (XP: +{xp})", "hook.evolution": "✨ {pokemon} evolved into {newPokemon}!", "hook.party_join": "🎊 {pokemon} joined the party!", - "hook.evolution_candidate_line": "- {pokemon} can evolve to: {targets} (or refuse)", - "hook.evolution_block_reason": "The following party pokemon are ready to evolve. You MUST call AskUserQuestion to ask the user which evolution they want for each one (one subquestion per pokemon, up to 4 subquestions).\n\n{candidateList}\n\nFor each pokemon, present the evolution targets as options plus a \"Refuse\" option. If the user selects a target, run: `tokenmon evolve `. If the user refuses, do nothing — the pokemon will not be re-prompted automatically.", + "hook.evolution_candidate_line": "- Pokemon: {pokemon}\n Use this question VERBATIM (AskUserQuestion.question, do NOT paraphrase): \"{pokemon} is ready to evolve! Choose a form:\"\n All evolution targets: {targets}", + "hook.evolution_block_reason": "Party pokemon are ready to evolve.\n\nYou MUST call AskUserQuestion. One subquestion per pokemon (batch up to 4 pokemon per call).\n\nPer-subquestion rules:\n- `question` field: copy the candidate's 'Use this question VERBATIM' line EXACTLY — do not paraphrase, do not reformat.\n- `options` (buttons, max 4):\n - 3 or fewer targets → all targets + 'Refuse'.\n - 4+ targets → first 3 targets + 'Refuse'. List the remaining targets on a separate line appended to the question body as 'Other forms: A, B, C' so the user can type one via 'Other'.\n- `multiSelect`: false\n\nCandidates:\n{candidateList}\n\nHandling the user's answer:\n- Button or Other with a target name → run `tokenmon evolve `.\n- 'Refuse' button or Other with 'refuse'/'no' → do nothing (no auto re-prompt).", "tier.heated": "The tall grass is rustling intensely... (Next: encounter 1.5x, XP 1.5x)", "tier.intense": "Something seems to be lurking nearby... (Next: encounter 2.5x, XP 2.5x)", diff --git a/src/i18n/en.pokemon.json b/src/i18n/en.pokemon.json index 97538283..73cca9be 100644 --- a/src/i18n/en.pokemon.json +++ b/src/i18n/en.pokemon.json @@ -371,8 +371,8 @@ "hook.levelup": "⬆️ {pokemon} Lv.{from} grew to Lv.{to}! (XP +{xp})", "hook.evolution": "✨ What? {pokemon} evolved into {newPokemon}!", "hook.party_join": "✨ {pokemon} joined the team!", - "hook.evolution_candidate_line": "- What? {pokemon} is ready to evolve into: {targets} (or refuse)", - "hook.evolution_block_reason": "What?! The following party pokemon are ready to evolve! You MUST call AskUserQuestion to ask the Trainer which evolution they want for each one (one subquestion per pokemon, up to 4 subquestions).\n\n{candidateList}\n\nFor each pokemon, present the evolution targets as options plus a \"Refuse\" option. If the Trainer selects a target, run: `tokenmon evolve `. If the Trainer refuses, do nothing — that pokemon will not be re-prompted automatically.", + "hook.evolution_candidate_line": "- Pokemon: {pokemon}\n Use this question VERBATIM (AskUserQuestion.question, do NOT paraphrase): \"What? {pokemon} is trying to evolve!\"\n All evolution targets: {targets}", + "hook.evolution_block_reason": "What?! Party pokemon are trying to evolve!\n\nYou MUST call AskUserQuestion. One subquestion per pokemon (batch up to 4 pokemon per call).\n\nPer-subquestion rules:\n- `question` field: copy the candidate's 'Use this question VERBATIM' line EXACTLY — do not paraphrase, do not reformat.\n- `options` (buttons, max 4):\n - 3 or fewer targets → all targets + 'Refuse'.\n - 4+ targets → first 3 targets + 'Refuse'. List the remaining targets on a separate line appended to the question body as 'Other forms: A, B, C' so the Trainer can type one via 'Other'.\n- `multiSelect`: false\n\nCandidates:\n{candidateList}\n\nHandling the Trainer's answer:\n- Button or Other with a target name → run `tokenmon evolve `.\n- 'Refuse' button or Other with 'refuse'/'no' → do nothing (no auto re-prompt).", "tier.heated": "The tall grass is rustling intensely... (Next: encounter 1.5x, XP 1.5x)", "tier.intense": "Something seems to be lurking nearby... (Next: encounter 2.5x, XP 2.5x)", diff --git a/src/i18n/ko.json b/src/i18n/ko.json index 3b7ca9ce..9eb7c158 100644 --- a/src/i18n/ko.json +++ b/src/i18n/ko.json @@ -404,8 +404,8 @@ "hook.levelup": "⬆️ {pokemon} Lv.{from} → Lv.{to}! (XP: +{xp})", "hook.evolution": "✨ {pokemon:이/가} {newPokemon}(으)로 진화했습니다!", "hook.party_join": "🎊 {pokemon:이/가} 파티에 합류했습니다!", - "hook.evolution_candidate_line": "- {pokemon} 진화 가능: {targets} (또는 거부)", - "hook.evolution_block_reason": "다음 파티 포켓몬이 진화할 준비가 되었습니다. 반드시 AskUserQuestion을 호출하여 각 포켓몬에 대해 사용자에게 진화를 물어보세요 (포켓몬당 하나의 subquestion, 최대 4개).\n\n{candidateList}\n\n각 포켓몬마다 진화 대상 옵션들과 \"거부\" 옵션을 함께 제시하세요. 사용자가 대상을 선택하면 `tokenmon evolve `을 실행하세요. 사용자가 거부하면 아무것도 하지 마세요 — 해당 포켓몬은 자동으로 다시 묻지 않습니다.", + "hook.evolution_candidate_line": "- 포켓몬: {pokemon}\n 그대로 쓸 질문 (AskUserQuestion.question, 한 글자도 바꾸지 말 것): \"{pokemon:이/가} 진화할 준비가 되었어! 어느 폼으로 진화할까?\"\n 진화 대상 전체: {targets}", + "hook.evolution_block_reason": "파티 포켓몬이 진화할 준비가 되었습니다.\n\n반드시 AskUserQuestion을 호출하세요. 포켓몬당 하나의 subquestion (최대 4 포켓몬까지 배치).\n\n각 subquestion 규칙:\n- `question` 필드: 아래 후보 항목의 '그대로 쓸 질문'을 문자 그대로(paraphrase 금지) 복사해서 사용합니다.\n- `options` (버튼, 최대 4개):\n - 진화 대상 3개 이하 → 모든 대상 + '거부'.\n - 진화 대상 4개 이상 → 앞 3개 대상 + '거부'. 나머지 대상은 question 본문 뒤에 별도 줄로 \"다른 폼: A, B, C\" 처럼 나열해 사용자가 'Other'로 이름을 타이핑할 수 있게 합니다.\n- `multiSelect`: false\n\n후보:\n{candidateList}\n\n사용자 응답 처리:\n- 버튼 또는 Other로 대상 이름 선택 → `tokenmon evolve ` 실행.\n- '거부' 또는 Other로 '거부'/'no' → 아무것도 하지 않음 (자동 재질문 없음).", "tier.heated": "풀숲이 크게 흔들리고 있다... (다음 턴 조우율 1.5x, XP 1.5x)", "tier.intense": "주변에 수상한 기운이 감돌고 있다... (다음 턴 조우율 2.5x, XP 2.5x)", diff --git a/src/i18n/ko.pokemon.json b/src/i18n/ko.pokemon.json index dcc6f78f..160e6427 100644 --- a/src/i18n/ko.pokemon.json +++ b/src/i18n/ko.pokemon.json @@ -371,8 +371,8 @@ "hook.levelup": "⬆️ {pokemon}은(는) Lv.{from}에서 레벨 {to}이(가) 되었다! (XP +{xp})", "hook.evolution": "✨ ...어라!? {pokemon:이/가} {newPokemon}(으)로 진화했다!", "hook.party_join": "✨ {pokemon:이/가} 동료가 되었다!", - "hook.evolution_candidate_line": "- 어라!? {pokemon:이/가} 진화할 준비가 되었다: {targets} (또는 거부)", - "hook.evolution_block_reason": "...어라!? 다음 파티 포켓몬이 진화할 준비가 되었다! 반드시 AskUserQuestion을 호출해서 트레이너에게 각 포켓몬의 진화를 물어봐야 한다 (포켓몬당 하나의 subquestion, 최대 4개).\n\n{candidateList}\n\n각 포켓몬마다 진화 대상 옵션들과 \"거부\" 옵션을 함께 제시해야 한다. 트레이너가 대상을 선택하면 `tokenmon evolve `을 실행한다. 트레이너가 거부하면 아무것도 하지 않는다 — 그 포켓몬은 자동으로 다시 묻지 않는다.", + "hook.evolution_candidate_line": "- 포켓몬: {pokemon}\n 그대로 쓸 질문 (AskUserQuestion.question 필드, 한 글자도 바꾸지 말 것): \"...어라!? {pokemon}의 상태가...?\"\n 진화 대상 전체: {targets}", + "hook.evolution_block_reason": "...어라!? 파티 포켓몬이 진화할 준비가 되었다!\n\n반드시 AskUserQuestion을 호출해야 한다. 포켓몬당 하나의 subquestion(최대 4 포켓몬까지 배치).\n\n각 subquestion 규칙:\n- `question` 필드: 아래 후보 항목의 '그대로 쓸 질문'을 문자 그대로(paraphrase 금지) 복사해서 사용한다.\n- `options` (버튼, 최대 4개):\n - 진화 대상 3개 이하 → 모든 대상을 버튼으로 + '거부' 버튼.\n - 진화 대상 4개 이상 → 앞 3개 대상 + '거부' 버튼. 나머지 대상은 question 본문 뒤에 별도 줄로 \"다른 폼: A, B, C\" 처럼 나열해 트레이너가 'Other'로 이름을 타이핑할 수 있게 한다.\n- `multiSelect`: false\n\n후보:\n{candidateList}\n\n트레이너 응답 처리:\n- 버튼 또는 Other로 대상 이름 선택 → `tokenmon evolve ` 실행.\n- '거부' 선택 또는 Other로 '거부'/'no' → 아무것도 하지 않는다 (자동으로 다시 묻지 않음).", "tier.heated": "풀숲이 크게 흔들리고 있다... (다음 턴 조우율 1.5x, XP 1.5x)", "tier.intense": "주변에 수상한 기운이 감돌고 있다... (다음 턴 조우율 2.5x, XP 2.5x)", From c4d52c06730878b01ba86f21e5ae1097b7a38bb1 Mon Sep 17 00:00:00 2001 From: Sangwon Lee Date: Mon, 20 Apr 2026 15:49:29 +0900 Subject: [PATCH 08/14] fix(i18n): validate Other-input targets before evolve, re-prompt on mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without an explicit rule, Claude could blindly feed a user's freeform 'Other' response into `tokenmon evolve`. A garbled or off-list name then errored silently inside applyBranchEvolution (not in the evolves_to list -> null return) with no useful feedback to the user. Extend hook.evolution_block_reason in all four locale + voice combos with an explicit handling rule: - Button picks run `tokenmon evolve` directly. - 'Refuse' (button or text: refuse/no/cancel/거부) skips. - Free-text 'Other' must be validated against the target list (case-insensitive, English and localized names both accepted) before the command runs. On mismatch, reply with a short "I didn't recognize that" and re-invoke the same AskUserQuestion. Re-prompt caps at 2 iterations so a user who keeps typing garbage eventually lands on an implicit 'Refuse' instead of an infinite loop. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/i18n/en.json | 2 +- src/i18n/en.pokemon.json | 2 +- src/i18n/ko.json | 2 +- src/i18n/ko.pokemon.json | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/i18n/en.json b/src/i18n/en.json index 24b203b6..381517e5 100644 --- a/src/i18n/en.json +++ b/src/i18n/en.json @@ -405,7 +405,7 @@ "hook.evolution": "✨ {pokemon} evolved into {newPokemon}!", "hook.party_join": "🎊 {pokemon} joined the party!", "hook.evolution_candidate_line": "- Pokemon: {pokemon}\n Use this question VERBATIM (AskUserQuestion.question, do NOT paraphrase): \"{pokemon} is ready to evolve! Choose a form:\"\n All evolution targets: {targets}", - "hook.evolution_block_reason": "Party pokemon are ready to evolve.\n\nYou MUST call AskUserQuestion. One subquestion per pokemon (batch up to 4 pokemon per call).\n\nPer-subquestion rules:\n- `question` field: copy the candidate's 'Use this question VERBATIM' line EXACTLY — do not paraphrase, do not reformat.\n- `options` (buttons, max 4):\n - 3 or fewer targets → all targets + 'Refuse'.\n - 4+ targets → first 3 targets + 'Refuse'. List the remaining targets on a separate line appended to the question body as 'Other forms: A, B, C' so the user can type one via 'Other'.\n- `multiSelect`: false\n\nCandidates:\n{candidateList}\n\nHandling the user's answer:\n- Button or Other with a target name → run `tokenmon evolve `.\n- 'Refuse' button or Other with 'refuse'/'no' → do nothing (no auto re-prompt).", + "hook.evolution_block_reason": "Party pokemon are ready to evolve.\n\nYou MUST call AskUserQuestion. One subquestion per pokemon (batch up to 4 pokemon per call).\n\nPer-subquestion rules:\n- `question` field: copy the candidate's 'Use this question VERBATIM' line EXACTLY — do not paraphrase, do not reformat.\n- `options` (buttons, max 4):\n - 3 or fewer targets → all targets + 'Refuse'.\n - 4+ targets → first 3 targets + 'Refuse'. List the remaining targets on a separate line appended to the question body as 'Other forms: A, B, C' so the user can type one via 'Other'.\n- `multiSelect`: false\n\nCandidates:\n{candidateList}\n\nHandling the user's answer:\n- Button selecting a target → run `tokenmon evolve `.\n- 'Refuse' button or Other with 'refuse'/'no'/'cancel' → do nothing (no auto re-prompt).\n- For free-text 'Other' input, VALIDATE before running `tokenmon evolve`:\n - If the input (case-insensitive, accepting localized or English names) matches a pokemon name in the 'All evolution targets' list, evolve to that target.\n - If it doesn't match, reply briefly with something like \"I didn't recognize that — please pick a button or type one of: {targets}, refuse\" and re-invoke the same AskUserQuestion (cap at 2 re-prompts to avoid loops; after that, treat as 'Refuse').", "tier.heated": "The tall grass is rustling intensely... (Next: encounter 1.5x, XP 1.5x)", "tier.intense": "Something seems to be lurking nearby... (Next: encounter 2.5x, XP 2.5x)", diff --git a/src/i18n/en.pokemon.json b/src/i18n/en.pokemon.json index 73cca9be..d0452aec 100644 --- a/src/i18n/en.pokemon.json +++ b/src/i18n/en.pokemon.json @@ -372,7 +372,7 @@ "hook.evolution": "✨ What? {pokemon} evolved into {newPokemon}!", "hook.party_join": "✨ {pokemon} joined the team!", "hook.evolution_candidate_line": "- Pokemon: {pokemon}\n Use this question VERBATIM (AskUserQuestion.question, do NOT paraphrase): \"What? {pokemon} is trying to evolve!\"\n All evolution targets: {targets}", - "hook.evolution_block_reason": "What?! Party pokemon are trying to evolve!\n\nYou MUST call AskUserQuestion. One subquestion per pokemon (batch up to 4 pokemon per call).\n\nPer-subquestion rules:\n- `question` field: copy the candidate's 'Use this question VERBATIM' line EXACTLY — do not paraphrase, do not reformat.\n- `options` (buttons, max 4):\n - 3 or fewer targets → all targets + 'Refuse'.\n - 4+ targets → first 3 targets + 'Refuse'. List the remaining targets on a separate line appended to the question body as 'Other forms: A, B, C' so the Trainer can type one via 'Other'.\n- `multiSelect`: false\n\nCandidates:\n{candidateList}\n\nHandling the Trainer's answer:\n- Button or Other with a target name → run `tokenmon evolve `.\n- 'Refuse' button or Other with 'refuse'/'no' → do nothing (no auto re-prompt).", + "hook.evolution_block_reason": "What?! Party pokemon are trying to evolve!\n\nYou MUST call AskUserQuestion. One subquestion per pokemon (batch up to 4 pokemon per call).\n\nPer-subquestion rules:\n- `question` field: copy the candidate's 'Use this question VERBATIM' line EXACTLY — do not paraphrase, do not reformat.\n- `options` (buttons, max 4):\n - 3 or fewer targets → all targets + 'Refuse'.\n - 4+ targets → first 3 targets + 'Refuse'. List the remaining targets on a separate line appended to the question body as 'Other forms: A, B, C' so the Trainer can type one via 'Other'.\n- `multiSelect`: false\n\nCandidates:\n{candidateList}\n\nHandling the Trainer's answer:\n- Button selecting a target → run `tokenmon evolve `.\n- 'Refuse' button or Other with 'refuse'/'no'/'cancel' → do nothing (no auto re-prompt).\n- For free-text 'Other' input, VALIDATE before running `tokenmon evolve`:\n - If the input (case-insensitive, accepting localized or English names) matches a pokemon name in the 'All evolution targets' list, evolve to that target.\n - If it doesn't match, reply with something like \"I didn't recognize that — please pick a button or type one of: {targets}, refuse\" and re-invoke the same AskUserQuestion (cap at 2 re-prompts to avoid loops; after that, treat as 'Refuse').", "tier.heated": "The tall grass is rustling intensely... (Next: encounter 1.5x, XP 1.5x)", "tier.intense": "Something seems to be lurking nearby... (Next: encounter 2.5x, XP 2.5x)", diff --git a/src/i18n/ko.json b/src/i18n/ko.json index 9eb7c158..076579d5 100644 --- a/src/i18n/ko.json +++ b/src/i18n/ko.json @@ -405,7 +405,7 @@ "hook.evolution": "✨ {pokemon:이/가} {newPokemon}(으)로 진화했습니다!", "hook.party_join": "🎊 {pokemon:이/가} 파티에 합류했습니다!", "hook.evolution_candidate_line": "- 포켓몬: {pokemon}\n 그대로 쓸 질문 (AskUserQuestion.question, 한 글자도 바꾸지 말 것): \"{pokemon:이/가} 진화할 준비가 되었어! 어느 폼으로 진화할까?\"\n 진화 대상 전체: {targets}", - "hook.evolution_block_reason": "파티 포켓몬이 진화할 준비가 되었습니다.\n\n반드시 AskUserQuestion을 호출하세요. 포켓몬당 하나의 subquestion (최대 4 포켓몬까지 배치).\n\n각 subquestion 규칙:\n- `question` 필드: 아래 후보 항목의 '그대로 쓸 질문'을 문자 그대로(paraphrase 금지) 복사해서 사용합니다.\n- `options` (버튼, 최대 4개):\n - 진화 대상 3개 이하 → 모든 대상 + '거부'.\n - 진화 대상 4개 이상 → 앞 3개 대상 + '거부'. 나머지 대상은 question 본문 뒤에 별도 줄로 \"다른 폼: A, B, C\" 처럼 나열해 사용자가 'Other'로 이름을 타이핑할 수 있게 합니다.\n- `multiSelect`: false\n\n후보:\n{candidateList}\n\n사용자 응답 처리:\n- 버튼 또는 Other로 대상 이름 선택 → `tokenmon evolve ` 실행.\n- '거부' 또는 Other로 '거부'/'no' → 아무것도 하지 않음 (자동 재질문 없음).", + "hook.evolution_block_reason": "파티 포켓몬이 진화할 준비가 되었습니다.\n\n반드시 AskUserQuestion을 호출하세요. 포켓몬당 하나의 subquestion (최대 4 포켓몬까지 배치).\n\n각 subquestion 규칙:\n- `question` 필드: 아래 후보 항목의 '그대로 쓸 질문'을 문자 그대로(paraphrase 금지) 복사해서 사용합니다.\n- `options` (버튼, 최대 4개):\n - 진화 대상 3개 이하 → 모든 대상 + '거부'.\n - 진화 대상 4개 이상 → 앞 3개 대상 + '거부'. 나머지 대상은 question 본문 뒤에 별도 줄로 \"다른 폼: A, B, C\" 처럼 나열해 사용자가 'Other'로 이름을 타이핑할 수 있게 합니다.\n- `multiSelect`: false\n\n후보:\n{candidateList}\n\n사용자 응답 처리:\n- 버튼으로 대상 선택 → `tokenmon evolve ` 실행.\n- '거부' 버튼 또는 Other에 '거부'/'no'/'cancel' → 아무것도 하지 않음 (자동 재질문 없음).\n- Other 입력의 경우 `tokenmon evolve` 실행 전에 반드시 validate:\n - 입력 텍스트(대소문자 무시, 한/영 이름 모두 허용)가 위 '진화 대상 전체' 목록의 포켓몬 이름과 일치하면 evolve 실행.\n - 일치하지 않으면 \"인식되지 않은 선택입니다. 버튼을 누르거나 다음 중 하나를 입력해 주세요: {targets}, 거부\"처럼 짧게 안내하고 동일한 AskUserQuestion을 다시 호출 (무한루프 방지: 최대 2회 재질문, 그 후에는 '거부'로 간주).", "tier.heated": "풀숲이 크게 흔들리고 있다... (다음 턴 조우율 1.5x, XP 1.5x)", "tier.intense": "주변에 수상한 기운이 감돌고 있다... (다음 턴 조우율 2.5x, XP 2.5x)", diff --git a/src/i18n/ko.pokemon.json b/src/i18n/ko.pokemon.json index 160e6427..b95b94e9 100644 --- a/src/i18n/ko.pokemon.json +++ b/src/i18n/ko.pokemon.json @@ -372,7 +372,7 @@ "hook.evolution": "✨ ...어라!? {pokemon:이/가} {newPokemon}(으)로 진화했다!", "hook.party_join": "✨ {pokemon:이/가} 동료가 되었다!", "hook.evolution_candidate_line": "- 포켓몬: {pokemon}\n 그대로 쓸 질문 (AskUserQuestion.question 필드, 한 글자도 바꾸지 말 것): \"...어라!? {pokemon}의 상태가...?\"\n 진화 대상 전체: {targets}", - "hook.evolution_block_reason": "...어라!? 파티 포켓몬이 진화할 준비가 되었다!\n\n반드시 AskUserQuestion을 호출해야 한다. 포켓몬당 하나의 subquestion(최대 4 포켓몬까지 배치).\n\n각 subquestion 규칙:\n- `question` 필드: 아래 후보 항목의 '그대로 쓸 질문'을 문자 그대로(paraphrase 금지) 복사해서 사용한다.\n- `options` (버튼, 최대 4개):\n - 진화 대상 3개 이하 → 모든 대상을 버튼으로 + '거부' 버튼.\n - 진화 대상 4개 이상 → 앞 3개 대상 + '거부' 버튼. 나머지 대상은 question 본문 뒤에 별도 줄로 \"다른 폼: A, B, C\" 처럼 나열해 트레이너가 'Other'로 이름을 타이핑할 수 있게 한다.\n- `multiSelect`: false\n\n후보:\n{candidateList}\n\n트레이너 응답 처리:\n- 버튼 또는 Other로 대상 이름 선택 → `tokenmon evolve ` 실행.\n- '거부' 선택 또는 Other로 '거부'/'no' → 아무것도 하지 않는다 (자동으로 다시 묻지 않음).", + "hook.evolution_block_reason": "...어라!? 파티 포켓몬이 진화할 준비가 되었다!\n\n반드시 AskUserQuestion을 호출해야 한다. 포켓몬당 하나의 subquestion(최대 4 포켓몬까지 배치).\n\n각 subquestion 규칙:\n- `question` 필드: 아래 후보 항목의 '그대로 쓸 질문'을 문자 그대로(paraphrase 금지) 복사해서 사용한다.\n- `options` (버튼, 최대 4개):\n - 진화 대상 3개 이하 → 모든 대상을 버튼으로 + '거부' 버튼.\n - 진화 대상 4개 이상 → 앞 3개 대상 + '거부' 버튼. 나머지 대상은 question 본문 뒤에 별도 줄로 \"다른 폼: A, B, C\" 처럼 나열해 트레이너가 'Other'로 이름을 타이핑할 수 있게 한다.\n- `multiSelect`: false\n\n후보:\n{candidateList}\n\n트레이너 응답 처리:\n- 버튼으로 대상 선택 → `tokenmon evolve ` 실행.\n- '거부' 버튼 또는 Other에 '거부'/'no'/'cancel' → 아무것도 하지 않는다 (자동으로 다시 묻지 않음).\n- Other 입력의 경우 `tokenmon evolve`를 실행하기 전에 반드시 validate한다:\n - 입력 텍스트(대소문자 무시, 한/영 이름 모두 허용)가 위 '진화 대상 전체' 목록의 포켓몬 이름과 일치하면 해당 대상으로 evolve.\n - 일치하지 않으면 \"인식할 수 없는 답이야. 버튼을 누르거나 다음 중 하나를 입력해: {targets}, 거부\"와 같이 짧게 안내하고 동일한 AskUserQuestion을 다시 호출한다 (무한루프 방지를 위해 최대 2회까지만 재질문, 그 뒤에는 '거부'로 간주).", "tier.heated": "풀숲이 크게 흔들리고 있다... (다음 턴 조우율 1.5x, XP 1.5x)", "tier.intense": "주변에 수상한 기운이 감돌고 있다... (다음 턴 조우율 2.5x, XP 2.5x)", From 8893c4b3006a44f772f4c2b1af02e2cdb2fde2b2 Mon Sep 17 00:00:00 2001 From: Sangwon Lee Date: Mon, 20 Apr 2026 16:06:52 +0900 Subject: [PATCH 09/14] fix(evolve+test-evolve): cross-gen name lookup, resolve target, seed items MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three live-test fallouts: 1. Cross-gen reverse name lookup in pokemonIdByName. Previously searched only the active generation's i18n, so "이브이" in a gen4-active save returned undefined and the evolve CLI said "컬렉션에서 "이브이"을(를) 찾을 수 없다". Mirror the forward lookup from getPokemonName: try the active gen first, then fall back across installed gens' ko/en tables. Real-gameplay benefit too — any path that accepts user-typed names for pokemon from other gens now resolves. 2. cmdEvolve's targetArg was passed through to the branch.name string-equal comparison without being resolved through resolvePokemonArg. That forced Claude (or the user) to pass numeric IDs; a localized name like "샤미드" always fell through to "현재 ~의 진화 조건을 만족하는 경로가 없다" even when the target was actually eligible. Apply the same name→ID resolution we already use on pokemonArg. 3. The branch-eevee test scenario seeded `evolution_options` with all 8 Eeveelutions but never seeded the evolution conditions, so applyBranchEvolution's runtime check rejected every choice. Add `items: { water-stone, thunder-stone, fire-stone }` to the scenario seed (and an optional `items`/`current_region` pair on the Scenario type + writeSeed) so the chosen target actually evolves, and trim evolution_options to the 5 branches that are genuinely eligible (stones + friendship) for clearer overflow testing. All 1203 existing tests still green; harness CLI typechecks clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/cli/test-evolve.ts | 24 ++++++++++++++++++++++-- src/cli/tokenmon.ts | 4 ++++ src/core/pokemon-data.ts | 18 ++++++++++++++---- src/test-evolve/verify.ts | 4 ++++ src/test-scenarios/branch-eevee.json | 13 +++++++++---- 5 files changed, 53 insertions(+), 10 deletions(-) diff --git a/src/cli/test-evolve.ts b/src/cli/test-evolve.ts index 9c52d323..57638d28 100644 --- a/src/cli/test-evolve.ts +++ b/src/cli/test-evolve.ts @@ -54,8 +54,28 @@ function writeSeed(gen: string, scenario: Scenario, backupDir: string): void { const baseConfig: Record = existsSync(cBackup) ? JSON.parse(readFileSync(cBackup, 'utf-8')) : {}; - writeFileSync(sfile, JSON.stringify({ ...baseState, pokemon: scenario.seed.pokemon, unlocked: scenario.seed.unlocked }, null, 2), 'utf-8'); - writeFileSync(cfile, JSON.stringify({ ...baseConfig, party: scenario.seed.party, starter_chosen: true }, null, 2), 'utf-8'); + // Merge scenario items on top of backed-up state.items so the evolve CLI's + // condition check sees the needed stones/held-items. Backs-up take + // precedence only for keys the scenario did not set. + const mergedItems = { + ...(baseState.items as Record | undefined ?? {}), + ...(scenario.seed.items ?? {}), + }; + + writeFileSync(sfile, JSON.stringify({ + ...baseState, + pokemon: scenario.seed.pokemon, + unlocked: scenario.seed.unlocked, + items: mergedItems, + }, null, 2), 'utf-8'); + + const configOverlay: Record = { + ...baseConfig, + party: scenario.seed.party, + starter_chosen: true, + }; + if (scenario.seed.current_region) configOverlay.current_region = scenario.seed.current_region; + writeFileSync(cfile, JSON.stringify(configOverlay, null, 2), 'utf-8'); } // ── Subcommands ── diff --git a/src/cli/tokenmon.ts b/src/cli/tokenmon.ts index d48bcce2..b3c7b51e 100644 --- a/src/cli/tokenmon.ts +++ b/src/cli/tokenmon.ts @@ -945,6 +945,10 @@ function cmdCheat(subcmd: string, arg1?: string, arg2?: string): void { function cmdEvolve(pokemonArg?: string, targetArg?: string): void { if (pokemonArg) pokemonArg = resolvePokemonArg(pokemonArg); + // Also resolve the target so the user (or Claude) may pass a localized name + // (e.g., "샤미드") instead of the numeric ID — branch.name values in the + // database are IDs, so the downstream string-equal comparison needs the ID. + if (targetArg) targetArg = resolvePokemonArg(targetArg); const config = readConfig(); const state = readState(); const pokemonDB = getPokemonDB(); diff --git a/src/core/pokemon-data.ts b/src/core/pokemon-data.ts index f27e4712..a6302ab4 100644 --- a/src/core/pokemon-data.ts +++ b/src/core/pokemon-data.ts @@ -362,10 +362,20 @@ export function pokemonIdByName(name: string, gen?: string): string | undefined } } - for (const locale of ['ko', 'en']) { - const i18n = getGameI18n(locale, gen); - for (const [id, pokeName] of Object.entries(i18n.pokemon)) { - if (pokeName === name) return id; + // Active generation first, then cross-gen fallback so a localized name + // from another generation's dex (e.g. "이브이" in a gen4-active save) + // still resolves. + const gensToSearch = [gen ?? getActiveGeneration(), ...NAME_LOOKUP_GENS.filter(g => g !== (gen ?? getActiveGeneration()))]; + for (const g of gensToSearch) { + for (const locale of ['ko', 'en']) { + try { + const i18n = getGameI18n(locale, g); + for (const [id, pokeName] of Object.entries(i18n.pokemon)) { + if (pokeName === name) return id; + } + } catch { + // Skip gens with no installed data + } } } return undefined; diff --git a/src/test-evolve/verify.ts b/src/test-evolve/verify.ts index b500a1b8..407dd5a8 100644 --- a/src/test-evolve/verify.ts +++ b/src/test-evolve/verify.ts @@ -16,6 +16,10 @@ export interface Scenario { party: string[]; pokemon: Record; unlocked: string[]; + /** Optional bag items keyed by canonical item id (e.g. "water-stone"). */ + items?: Record; + /** Optional current_region override on config (e.g. "4") for location-based evolutions. */ + current_region?: string; }; expected_block: { decision: string; diff --git a/src/test-scenarios/branch-eevee.json b/src/test-scenarios/branch-eevee.json index e6879827..cf466ab6 100644 --- a/src/test-scenarios/branch-eevee.json +++ b/src/test-scenarios/branch-eevee.json @@ -1,6 +1,6 @@ { "name": "branch-eevee", - "description": "Eevee (#133) branch evolution — user picks Vaporeon (#134) from 8 options", + "description": "Eevee (#133) branch evolution — 5 eligible branches seeded (water-stone, thunder-stone, fire-stone, friendship). Tests 4+ overflow: 3 targets as buttons + Refuse, remaining listed in question body.", "seed": { "party": ["133"], "pokemon": { @@ -10,15 +10,20 @@ "xp": 15625, "friendship": 220, "evolution_ready": true, - "evolution_options": ["134", "135", "136", "196", "197", "470", "471", "700"], + "evolution_options": ["134", "135", "136", "196", "197"], "met": "starter" } }, - "unlocked": ["133"] + "unlocked": ["133"], + "items": { + "water-stone": 1, + "thunder-stone": 1, + "fire-stone": 1 + } }, "expected_block": { "decision": "block", - "reason_contains": ["Eevee", "AskUserQuestion", "tokenmon evolve"] + "reason_contains": ["이브이", "Eevee", "AskUserQuestion", "tokenmon evolve"] }, "expected_choice": "134", "expected_after": { From 6982d7e7689f5e0625f7d98cf909453191a5bddf Mon Sep 17 00:00:00 2001 From: Sangwon Lee Date: Mon, 20 Apr 2026 16:34:21 +0900 Subject: [PATCH 10/14] fix(test-evolve): restore 8-branch Eevee + force same-turn completion in skill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. branch-eevee now seeds the full 8-branch evolution_options again (134/135/136/196/197/470/471/700). Only 3 are eligible via the seeded stones + 2 via friendship, but all 8 belong to the real Eevee dex so limiting the seed to 5 was misleading — users expected the complete set in the AskUserQuestion prompt. The ineligible three (Leafeon/Glaceon/Sylveon) naturally end up in the question body's "Other forms" list because the 4-option cap rule only promotes eligible targets to buttons. 2. Rewrite skills/test-evolve/SKILL.md so the evolution-trigger turn is treated as one continuous cycle instead of a turn-N / turn-N+1 split. The previous wording let Claude stop after the user answered the AskUserQuestion without running `tokenmon evolve`, verify, or restore. The new instructions say explicitly that the entire chain (render question → run evolve → print summary → --verify → --restore → final report) must complete within the same turn without stopping early. Co-Authored-By: Claude Opus 4.7 (1M context) --- skills/test-evolve/SKILL.md | 56 ++++++++++++++-------------- src/test-scenarios/branch-eevee.json | 4 +- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/skills/test-evolve/SKILL.md b/skills/test-evolve/SKILL.md index 8221fd74..7fce0885 100644 --- a/skills/test-evolve/SKILL.md +++ b/skills/test-evolve/SKILL.md @@ -2,51 +2,49 @@ description: "Dev-only: manual test harness for the evolution AskUserQuestion flow. Backs up state, seeds a scenario party, auto-verifies + auto-restores after the user completes the evolution prompt." --- -Dev-only test harness for the evolution AskUserQuestion flow. No tmux, no spawning — the user triggers the evolution prompt manually in this live session. Verify and restore run automatically; the user only has to pick the scenario and click through the `AskUserQuestion` UI. +Dev-only test harness for the evolution AskUserQuestion flow. No tmux, no spawning — the user triggers the evolution prompt manually in this live session. Verify and restore run automatically once the evolution cycle completes; the user only has to pick the scenario and click through the `AskUserQuestion` UI. ```bash P="${CLAUDE_PLUGIN_ROOT:-$(ls -d ~/.claude/plugins/marketplaces/tkm 2>/dev/null || ls -d ~/.claude/plugins/cache/tkm/tkm/*/ 2>/dev/null | sort -V | tail -1)}" ``` -## Lifecycle (one scenario, auto verify + restore) +## Dispatch for flag arguments + +- `--list` → `"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" --list`, show output, stop. +- `--restore` → same, `--restore`. Emergency cleanup when an earlier cycle did not auto-restore (e.g. the session was killed mid-test). +- `--help` → same, `--help`. +- `--verify` is present in the CLI but should not be invoked directly by users; it is called automatically as part of the lifecycle below. -When `$ARGUMENTS` is a scenario name (not a flag starting with `--`), execute this **multi-turn** protocol. +## Scenario lifecycle (when `$ARGUMENTS` is a scenario name) -### Turn 1 — setup (this turn) +### Step 1 — setup (this turn) 1. Run: `"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" --setup ${ARGUMENTS}` -2. Show the setup output. -3. Remember (carry into the next turn): a test cycle is active for scenario `${ARGUMENTS}`. The CLI wrote `.tokenmon/test-backup/current.json` as a persistent marker — checking for its existence confirms the cycle is still mid-flight. -4. Tell the user, verbatim: +2. Show the setup output verbatim. +3. Tell the user, verbatim: > Party seeded for **${ARGUMENTS}**. Send any short message to trigger the Stop-hook evolution prompt. After you click an option (or `Refuse`), I'll auto-verify and auto-restore. -5. Stop the turn. Do **not** run verify or restore yet. - -### Turn N — user-triggered evolution event - -The user sends a message. The Stop hook emits `{"decision":"block", "reason": ...}` which arrives back as Claude-visible feedback, with instructions to call `AskUserQuestion` for each evolution candidate. Render the question(s) exactly as instructed by the block reason. For each user selection: +4. Stop the turn here. Do **not** pre-run verify or restore. -- If the user picked a target, run: `tokenmon evolve ` -- If the user refused, do nothing (the `evolution_prompt_shown` flag already gates re-prompting) +### Step 2 — user-triggered evolution turn -### Turn N+1 — auto verify + auto restore (MANDATORY) +When the user sends any message after setup, the Stop hook emits `{"decision":"block", "reason": ...}` and Claude Code feeds the block's `reason` field back as the next turn's instruction. **Within that same turn you MUST complete the entire cycle below without stopping early** — do not wait for another user turn between steps, do not acknowledge and stop before the cycle finishes. -Immediately after the `tokenmon evolve` call(s) succeed (or the refuse path completes), and **before responding with anything else**, run these two commands in order: +**In order, without pausing:** -1. `"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" --verify` -2. `"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" --restore` +1. Render the evolution prompt. Call `AskUserQuestion` exactly as the block reason directs (one subquestion per pokemon; question text copied verbatim; up to 4 buttons with the 3-eligible-plus-Refuse rule; remaining targets listed inline). +2. When the user answers: + - **Button picking a target** → resolve the button label to its target id if needed, then run `"$P/bin/tsx-resolve.sh" "$P/src/cli/tokenmon.ts" evolve ` in a single Bash call. + - **`Refuse` button** or **Other containing `refuse`/`no`/`cancel`/`거부`** → skip the evolve call. + - **Other containing a pokemon name** → validate it against the candidate's `All evolution targets` list; if it matches, run evolve with the resolved target; if it does not match, reply with a short "I didn't recognize that" and re-invoke the same `AskUserQuestion` (max two re-prompts, then treat as Refuse). +3. After the evolve call returns (or after the refuse path settles), print a one-line summary to the user that names the pokemon and its new form (or says "refused") so they see that their pick took effect. +4. **Immediately**, in the same turn, run: + 1. `"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" --verify` + 2. `"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" --restore` +5. Show a compact final report: scenario name, user's pick, verify verdict (PASS / FAIL with any failing fields), and restore confirmation. -Restore **always** runs, even if verify reports FAIL. The user's live state/config/hooks.json are only safe once `--restore` completes. - -Show the combined output to the user as a compact summary: scenario name, verify verdict (PASS / FAIL + failing fields), restore confirmation. - -## Dispatch for flag arguments - -- `--list` → `"$P/bin/tsx-resolve.sh" "$P/src/cli/test-evolve.ts" --list`, show output, stop. -- `--restore` → same, `--restore`. For emergency cleanup when an earlier cycle did not auto-restore (e.g. the session was killed mid-test). -- `--help` → same, `--help`. -- `--verify` is present in the CLI but should not be invoked directly by users; it is called automatically as part of the lifecycle above. +**Critical:** `--restore` must run even when `--verify` reports FAIL. The user's real state/config/hooks.json only become safe again after the restore completes. ## Usage @@ -58,7 +56,7 @@ Show the combined output to the user as a compact summary: scenario name, verify ## Scenarios (see `src/test-scenarios/*.json`) -- `branch-eevee` — 8-way branch, expect Vaporeon +- `branch-eevee` — full 8-way branch, 3 eligible via stones + 2 via friendship; exercises the overflow rule - `single-charmander` — single-chain, expect Charmeleon - `multi-3` — 3 pokemon ready, batch in one `AskUserQuestion` - `overflow-5` — 5 pokemon ready, first 4 this turn, 5th deferred diff --git a/src/test-scenarios/branch-eevee.json b/src/test-scenarios/branch-eevee.json index cf466ab6..cd473102 100644 --- a/src/test-scenarios/branch-eevee.json +++ b/src/test-scenarios/branch-eevee.json @@ -1,6 +1,6 @@ { "name": "branch-eevee", - "description": "Eevee (#133) branch evolution — 5 eligible branches seeded (water-stone, thunder-stone, fire-stone, friendship). Tests 4+ overflow: 3 targets as buttons + Refuse, remaining listed in question body.", + "description": "Eevee (#133) full 8-way branch evolution. 3 eligible via seeded stones (water/thunder/fire), 2 via friendship (Espeon/Umbreon), 3 ineligible without extra context (Leafeon location, Glaceon location, Sylveon fairy-move). Exercises the 4+ overflow rule: first 3 eligible as buttons + Refuse, remaining listed as 'Other forms' in question body.", "seed": { "party": ["133"], "pokemon": { @@ -10,7 +10,7 @@ "xp": 15625, "friendship": 220, "evolution_ready": true, - "evolution_options": ["134", "135", "136", "196", "197"], + "evolution_options": ["134", "135", "136", "196", "197", "470", "471", "700"], "met": "starter" } }, From 948c11ce82f90db041417fb4967de3e3a30d3c3f Mon Sep 17 00:00:00 2001 From: Sangwon Lee Date: Mon, 20 Apr 2026 16:45:07 +0900 Subject: [PATCH 11/14] fix(evolve): cross-gen ensurePokemonInDB fallback for non-native source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the active generation's pokemon DB does not contain the source pokemon (e.g. Eevee #133 in a gen4-active save), checkEvolution, getEligibleBranches, applyBranchEvolution and applySingleChainEvolution all returned null/empty because `db.pokemon[baseId]` is undefined. That left test-evolve --setup branch-eevee + Claude-driven evolve failing with "현재 이브이의 진화 조건을 만족하는 경로가 없다" on every scenario that used a cross-gen pokemon — and would also affect any real-gameplay path where a user's party member came from another generation via migration. Fix: every access of the source pokemon's data now falls back to ensurePokemonInDB, which transparently walks the other generations' data and injects the missing pokemon into the active gen's cache. applyBranchEvolution also applies the same fallback for the target data so cross-gen branch targets resolve. Also correct branch-eevee.json: the crawled data only contains the three stone-based Eeveelutions (Vaporeon/Jolteon/Flareon), so the scenario's evolution_options list is trimmed to match reality. The later Eeveelutions (Espeon/Umbreon/Leafeon/Glaceon/Sylveon) are a data-crawler backlog item, not an overflow-path regression. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/evolution.ts | 17 ++++++++++++----- src/test-scenarios/branch-eevee.json | 4 ++-- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/core/evolution.ts b/src/core/evolution.ts index a43c02ec..dfce3497 100644 --- a/src/core/evolution.ts +++ b/src/core/evolution.ts @@ -37,7 +37,8 @@ export function checkEvolution( state?: State, ): EvolutionResult | null { const db = getPokemonDB(); - const data = db.pokemon[toBaseId(pokemonName)]; + const baseId = toBaseId(pokemonName); + const data = db.pokemon[baseId] ?? ensurePokemonInDB(baseId) ?? undefined; if (!data) return null; // Branching evolution: block auto-evolve, set flags on state. @@ -170,7 +171,10 @@ export function getEligibleBranches( context: EvolutionContext, ): BranchInfo[] { const db = getPokemonDB(); - const data = db.pokemon[toBaseId(pokemonName)]; + const baseId = toBaseId(pokemonName); + // Cross-gen fallback: load the source pokemon into the active generation's + // DB when it originates from another gen (e.g. Eevee in a gen4 save). + const data = db.pokemon[baseId] ?? ensurePokemonInDB(baseId) ?? undefined; if (!data || !Array.isArray(data.evolves_to)) return []; return (data.evolves_to as BranchEvolution[]).map(branch => ({ @@ -190,13 +194,15 @@ export function applyBranchEvolution( targetName: string, ): EvolutionResult | null { const db = getPokemonDB(); - const data = db.pokemon[toBaseId(pokemonName)]; + const baseId = toBaseId(pokemonName); + const data = db.pokemon[baseId] ?? ensurePokemonInDB(baseId) ?? undefined; if (!data || !Array.isArray(data.evolves_to)) return null; const branch = (data.evolves_to as BranchEvolution[]).find(b => b.name === targetName); if (!branch) return null; - const targetData = db.pokemon[targetName]; + // Cross-gen fallback for the target data too (Vaporeon etc. may live in gen1). + const targetData = db.pokemon[targetName] ?? ensurePokemonInDB(targetName) ?? undefined; if (!targetData) return null; // Block re-evolution if direct evolved form already in unlocked (defense-in-depth) @@ -234,7 +240,8 @@ export function applySingleChainEvolution( targetName: string, ): EvolutionResult | null { const db = getPokemonDB(); - const data = db.pokemon[toBaseId(pokemonName)]; + const baseId = toBaseId(pokemonName); + const data = db.pokemon[baseId] ?? ensurePokemonInDB(baseId) ?? undefined; if (!data) return null; // Must be single-chain (not branching) diff --git a/src/test-scenarios/branch-eevee.json b/src/test-scenarios/branch-eevee.json index cd473102..3ae4c39c 100644 --- a/src/test-scenarios/branch-eevee.json +++ b/src/test-scenarios/branch-eevee.json @@ -1,6 +1,6 @@ { "name": "branch-eevee", - "description": "Eevee (#133) full 8-way branch evolution. 3 eligible via seeded stones (water/thunder/fire), 2 via friendship (Espeon/Umbreon), 3 ineligible without extra context (Leafeon location, Glaceon location, Sylveon fairy-move). Exercises the 4+ overflow rule: first 3 eligible as buttons + Refuse, remaining listed as 'Other forms' in question body.", + "description": "Eevee (#133) branch evolution — 3 eligible branches in the data (Vaporeon/Jolteon/Flareon via stones). All 3 fit as buttons alongside Refuse (4 options total, no overflow). Cross-gen data path: Eevee is gen1-only, so this also exercises the cross-gen ensurePokemonInDB fallback when the save is on gen4+.", "seed": { "party": ["133"], "pokemon": { @@ -10,7 +10,7 @@ "xp": 15625, "friendship": 220, "evolution_ready": true, - "evolution_options": ["134", "135", "136", "196", "197", "470", "471", "700"], + "evolution_options": ["134", "135", "136"], "met": "starter" } }, From 9f636a1d9e7f79aa8f3f718318033bf1e7924899 Mon Sep 17 00:00:00 2001 From: Sangwon Lee Date: Mon, 20 Apr 2026 16:53:31 +0900 Subject: [PATCH 12/14] fix(evolve): single-chain support in cmdEvolve + cross-gen target fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live scenario testing surfaced two related failures: 1. cmdEvolve only dispatched through getEligibleBranches, so pokemon whose data.evolves_to is a string (or legacy line[stage+1]) — i.e. every non-branching pokemon — hit "no eligible" and returned without invoking executeEvolve. That defeated the Stop-hook AskUserQuestion flow for any single-chain pokemon: the user would see the prompt, click the target, and nothing would happen. Add a dedicated single-chain branch in cmdEvolve that routes through checkEvolution (no state → returns a validated EvolutionResult) and then executeEvolve, which already dispatches to applySingleChainEvolution under the hood. The branch runs ahead of the getEligibleBranches path so branching pokemon still use the original flow. 2. checkEvolution's string-evolves_to path only called ensurePokemonInDB for explicit cross-gen references like "gen1:25". Plain numeric IDs that happened to live only in another generation (e.g. Charmeleon #5 on a gen4-active save) fell through and returned null because targetData was undefined. Same issue on the legacy line[stage+1] path. Both paths now use the ensurePokemonInDB fallback so cross-gen targets resolve. Also seed the required evolution stones on the multi-3 and overflow-5 scenarios so Pikachu (thunder-stone) and Eevee (water/thunder/fire stones) can actually evolve when chosen. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/cli/tokenmon.ts | 27 +++++++++++++++++++++++++-- src/core/evolution.ts | 5 ++++- src/test-scenarios/multi-3.json | 3 ++- src/test-scenarios/overflow-5.json | 3 ++- 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/cli/tokenmon.ts b/src/cli/tokenmon.ts index b3c7b51e..5a935eb3 100644 --- a/src/cli/tokenmon.ts +++ b/src/cli/tokenmon.ts @@ -4,14 +4,14 @@ import { readFileSync, existsSync } from 'fs'; import { join } from 'path'; import { readState, writeState } from '../core/state.js'; import { readConfig, writeConfig, getDefaultConfig, readGlobalConfig, writeGlobalConfig } from '../core/config.js'; -import { getPokemonDB, getAchievementsDB, getAchievementName, getAchievementDescription, getAchievementRarityLabel, getRegionName, getRegionDescription, getPokemonName, getGenerationsDB, invalidateGenCache, pokemonIdByName, resolveNameToId, getDisplayName, formatMetInfo } from '../core/pokemon-data.js'; +import { getPokemonDB, getAchievementsDB, getAchievementName, getAchievementDescription, getAchievementRarityLabel, getRegionName, getRegionDescription, getPokemonName, getGenerationsDB, invalidateGenCache, pokemonIdByName, resolveNameToId, getDisplayName, formatMetInfo, ensurePokemonInDB } from '../core/pokemon-data.js'; import { levelToXp } from '../core/xp.js'; import { playCry } from '../audio/play-cry.js'; import { getCompletion, getPokedexList, syncPokedexFromUnlocked, getRegionSummary } from '../core/pokedex.js'; import { getBoxList } from '../core/box.js'; import { getCurrentRegion, getRegionList, moveToRegion } from '../core/regions.js'; import { renderGuide, renderGuideIndex } from '../core/guide.js'; -import { getEligibleBranches, applyBranchEvolution, applySingleChainEvolution } from '../core/evolution.js'; +import { getEligibleBranches, applyBranchEvolution, applySingleChainEvolution, checkEvolution } from '../core/evolution.js'; import { getActiveNotifications, dismissAll } from '../core/notifications.js'; import { getActiveEvents } from '../core/encounter.js'; import { getEventsDB, getRegionsDB, getPokedexRewardsDB } from '../core/pokemon-data.js'; @@ -989,6 +989,29 @@ function cmdEvolve(pokemonArg?: string, targetArg?: string): void { unlockedAchievements: Object.keys(state.achievements).filter(k => state.achievements[k]), items: state.items ?? {}, }; + + // Single-chain path: data.evolves_to is a string (or legacy line[stage+1]). + // cmdEvolve originally only handled branch evolutions, so single-chain + // pokemon reached the AskUserQuestion prompt from the Stop-hook block but + // could not actually complete the evolve. Route single-chain through + // checkEvolution (no state → returns an EvolutionResult with the resolved + // target) and then executeEvolve's dispatcher, which calls + // applySingleChainEvolution. + const baseData = pokemonDB.pokemon[toBaseId(pokemonArg)] ?? ensurePokemonInDB(toBaseId(pokemonArg)); + if (baseData && !Array.isArray(baseData.evolves_to)) { + const result = checkEvolution(pokemonArg, ctx); + if (!result) { + warn(t('cli.evolve.no_eligible', { pokemon: getPokemonName(pokemonArg) })); + return; + } + if (targetArg && targetArg !== result.newPokemon) { + error(t('cli.evolve.invalid_target', { target: targetArg })); + return; + } + executeEvolve(pokemonArg, result.newPokemon, config); + return; + } + const branches = getEligibleBranches(pokemonArg, ctx); // UX-only: hide branches whose evolved form is already in unlocked (safety guards are in checkEvolution/applyBranchEvolution) const eligible = branches.filter(b => { diff --git a/src/core/evolution.ts b/src/core/evolution.ts index dfce3497..e6f7446b 100644 --- a/src/core/evolution.ts +++ b/src/core/evolution.ts @@ -78,6 +78,9 @@ export function checkEvolution( if (crossRef) { targetName = crossRef.id; targetData = ensurePokemonInDB(targetName) ?? undefined; + } else if (!targetData) { + // Plain ID that's not in the active gen's db — try cross-gen injection. + targetData = ensurePokemonInDB(targetName) ?? undefined; } if (!targetData) return null; @@ -111,7 +114,7 @@ export function checkEvolution( const nextStage = data.stage + 1; if (nextStage >= data.line.length) return null; const nextPokemon = data.line[nextStage]; - const nextData = db.pokemon[nextPokemon]; + const nextData = db.pokemon[nextPokemon] ?? ensurePokemonInDB(nextPokemon) ?? undefined; if (!nextData) return null; // Block re-evolution if direct evolved form already in unlocked diff --git a/src/test-scenarios/multi-3.json b/src/test-scenarios/multi-3.json index 38d9aa13..4c048bdb 100644 --- a/src/test-scenarios/multi-3.json +++ b/src/test-scenarios/multi-3.json @@ -32,7 +32,8 @@ "met": "wild" } }, - "unlocked": ["4", "7", "25"] + "unlocked": ["4", "7", "25"], + "items": { "thunder-stone": 1 } }, "expected_block": { "decision": "block", diff --git a/src/test-scenarios/overflow-5.json b/src/test-scenarios/overflow-5.json index 42bc3167..24035c30 100644 --- a/src/test-scenarios/overflow-5.json +++ b/src/test-scenarios/overflow-5.json @@ -50,7 +50,8 @@ "met": "wild" } }, - "unlocked": ["4", "7", "25", "133", "172"] + "unlocked": ["4", "7", "25", "133", "172"], + "items": { "water-stone": 1, "thunder-stone": 1, "fire-stone": 1 } }, "expected_block": { "decision": "block", From 31eb611a5a66df34a8fc1130c797cb45b97639b4 Mon Sep 17 00:00:00 2001 From: Sangwon Lee Date: Mon, 20 Apr 2026 17:06:56 +0900 Subject: [PATCH 13/14] fix(stop): append test-harness verify+restore to block reason when active The test-evolve skill only lives in Claude's context during the turn that invokes /tkm:test-evolve (setup turn). On the follow-up turn where the Stop hook emits the evolution block and Claude renders AskUserQuestion, the skill content has usually dropped out of the prompt window, so Claude hands control back to the user after the evolve call instead of running the auto verify + auto restore tail the skill required. When stop.ts detects the harness marker file ( .tokenmon/test-backup/current.json), append an explicit "[TEST HARNESS ACTIVE]" block to the reason that spells out the two trailing commands with their absolute paths (the dev flow's own PLUGIN_ROOT + bin/tsx-resolve.sh + src/cli/test-evolve.ts), plus a reminder to print the final report. That keeps the cycle self-closing whether or not the skill-turn history is still in context. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/hooks/stop.ts | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/hooks/stop.ts b/src/hooks/stop.ts index 6093160c..cc82656e 100644 --- a/src/hooks/stop.ts +++ b/src/hooks/stop.ts @@ -17,7 +17,7 @@ import { addItem, randInt, getDropRateMultiplier } from '../core/items.js'; import { getRegionDropMessage } from '../core/region-messages.js'; import { getVolumeTier, getVolumeTierByName } from '../core/volume-tier.js'; import { withLock, withLockRetry } from '../core/lock.js'; -import { setActiveGenerationCache, getActiveGeneration } from '../core/paths.js'; +import { setActiveGenerationCache, getActiveGeneration, DATA_DIR, PLUGIN_ROOT } from '../core/paths.js'; import { isShinyKey, toBaseId, toShinyKey } from '../core/shiny-utils.js'; import { recordXp, recordBattle, recordCatch, recordEncounter, recordShinyEncounter, recordShinyCatch, recordShinyEscaped } from '../core/stats.js'; import { loadGymData } from '../core/gym.js'; @@ -605,7 +605,27 @@ async function main(): Promise { targets: c.options.map(o => getPokemonName(o)).join(', '), })) .join('\n'); - const reason = t('hook.evolution_block_reason', { candidateList }); + let reason = t('hook.evolution_block_reason', { candidateList }); + // Test-harness awareness: when the dev /tkm:test-evolve harness has an + // active cycle (marker file at .tokenmon/test-backup/current.json), + // tack on the verify + restore instructions so Claude closes the + // cycle inside the same turn even if the skill-invocation context + // has fallen out of the current prompt window. + try { + const currentPtr = join(DATA_DIR, 'test-backup', 'current.json'); + if (existsSync(currentPtr)) { + const tsxResolve = join(PLUGIN_ROOT, 'bin', 'tsx-resolve.sh'); + const testCli = join(PLUGIN_ROOT, 'src', 'cli', 'test-evolve.ts'); + reason += '\n\n[TEST HARNESS ACTIVE] ' + + 'After you run tokenmon evolve (or after a Refuse), you MUST, in the same turn without stopping, run exactly:\n' + + ` "${tsxResolve}" "${testCli}" --verify\n` + + ` "${tsxResolve}" "${testCli}" --restore\n` + + 'Show both outputs to the user, then print a compact final report (scenario name, pick, verify verdict, restore confirmation).'; + } + } catch { + // Harness check is best-effort; missing paths/stat errors must not + // suppress the core evolution block. + } playCry(); // Preserve level-up/achievement messages from the parent lock; systemMessage is // user-facing only, so merging it here does not interfere with the block reason From 24aecbf8d37f994d6008d3668db52db54711fc0c Mon Sep 17 00:00:00 2001 From: Sangwon Lee Date: Mon, 20 Apr 2026 21:52:43 +0900 Subject: [PATCH 14/14] fix: address review comments (P1 single-chain, P2 hooks cache lookup + achievements) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three review findings from review-comments.md: 1. [P1] applySingleChainEvolution now falls back to ensurePokemonInDB for plain numeric cross-gen targets, not just the explicit `genN:id` crossGenRef syntax. Matches the fallback already in checkEvolution so every Stop-hook evolution that asks the user can actually resolve when the target species lives in another generation's dex. The legacy line[stage+1] branch picks up the same fallback for symmetry. 2. [P2] getInstalledHooksPath in src/test-evolve/backup.ts now searches ~/.claude/plugins/cache/tkm/tkm//hooks/hooks.json in addition to the worktree PLUGIN_ROOT and the marketplaces tree. That is where the release install actually lives — the prior helper threw before creating backups on any machine installed via the cache path, which was exactly the setup the new harness documents. 3. [P2] cmdAchievements now reads commonState in addition to state.achievements and merges entries from getCommonAchievementsDB so cross-generation achievements such as all_gen_badges stay in the listing. Before this patch the command only consulted the active-gen table so common achievements silently disappeared once a user unlocked them. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/cli/tokenmon.ts | 31 ++++++++++++++++++++++++------- src/core/evolution.ts | 7 ++++++- src/test-evolve/backup.ts | 22 +++++++++++++++++++++- 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/src/cli/tokenmon.ts b/src/cli/tokenmon.ts index 5a935eb3..2724ad49 100644 --- a/src/cli/tokenmon.ts +++ b/src/cli/tokenmon.ts @@ -2,9 +2,9 @@ import * as readline from 'readline'; import { readFileSync, existsSync } from 'fs'; import { join } from 'path'; -import { readState, writeState } from '../core/state.js'; +import { readState, writeState, readCommonState } from '../core/state.js'; import { readConfig, writeConfig, getDefaultConfig, readGlobalConfig, writeGlobalConfig } from '../core/config.js'; -import { getPokemonDB, getAchievementsDB, getAchievementName, getAchievementDescription, getAchievementRarityLabel, getRegionName, getRegionDescription, getPokemonName, getGenerationsDB, invalidateGenCache, pokemonIdByName, resolveNameToId, getDisplayName, formatMetInfo, ensurePokemonInDB } from '../core/pokemon-data.js'; +import { getPokemonDB, getAchievementsDB, getCommonAchievementsDB, getAchievementName, getAchievementDescription, getAchievementRarityLabel, getRegionName, getRegionDescription, getPokemonName, getGenerationsDB, invalidateGenCache, pokemonIdByName, resolveNameToId, getDisplayName, formatMetInfo, ensurePokemonInDB } from '../core/pokemon-data.js'; import { levelToXp } from '../core/xp.js'; import { playCry } from '../audio/play-cry.js'; import { getCompletion, getPokedexList, syncPokedexFromUnlocked, getRegionSummary } from '../core/pokedex.js'; @@ -372,19 +372,36 @@ function cmdUnlockList(): void { function cmdAchievements(): void { const state = readState(); + const commonState = readCommonState(); const achDB = getAchievementsDB(); + const commonDB = getCommonAchievementsDB(); bold(t('cli.achievements.header')); console.log(''); + // Gen-specific achievements come from the active generation's state map, + // common/cross-generation achievements (e.g. all_gen_badges) live in + // commonState.achievements and must be surfaced alongside so previously + // unlocked common titles do not silently disappear from the listing. + const seen = new Set(); + const rows: Array<{ id: string; unlocked: boolean }> = []; for (const ach of achDB.achievements) { - const achieved = !!state.achievements[ach.id]; - if (achieved) { - console.log(` ${GREEN}✓${RESET} ${BOLD}${getAchievementName(ach.id)}${RESET} ${getAchievementRarityLabel(ach.id)}`); + rows.push({ id: ach.id, unlocked: !!state.achievements[ach.id] }); + seen.add(ach.id); + } + for (const ach of commonDB.achievements) { + if (seen.has(ach.id)) continue; + rows.push({ id: ach.id, unlocked: !!commonState.achievements[ach.id] }); + seen.add(ach.id); + } + + for (const row of rows) { + if (row.unlocked) { + console.log(` ${GREEN}✓${RESET} ${BOLD}${getAchievementName(row.id)}${RESET} ${getAchievementRarityLabel(row.id)}`); } else { - console.log(` ${GRAY}○ ${getAchievementName(ach.id)} ${getAchievementRarityLabel(ach.id)}${RESET}`); + console.log(` ${GRAY}○ ${getAchievementName(row.id)} ${getAchievementRarityLabel(row.id)}${RESET}`); } - console.log(` ${GRAY}${getAchievementDescription(ach.id)}${RESET}`); + console.log(` ${GRAY}${getAchievementDescription(row.id)}${RESET}`); console.log(''); } } diff --git a/src/core/evolution.ts b/src/core/evolution.ts index e6f7446b..538f5790 100644 --- a/src/core/evolution.ts +++ b/src/core/evolution.ts @@ -261,13 +261,18 @@ export function applySingleChainEvolution( if (crossRef) { resolvedTarget = crossRef.id; targetData = ensurePokemonInDB(resolvedTarget) ?? undefined; + } else if (!targetData) { + // Plain numeric ID target that only lives in another generation's dex + // (e.g. Charmeleon #5 on a gen4-active save). Pull it in so single-chain + // evolutions complete instead of erroring out after the prompt. + targetData = ensurePokemonInDB(resolvedTarget) ?? undefined; } } else { // Legacy path: line[stage+1] const nextStage = data.stage + 1; if (nextStage < data.line.length) { resolvedTarget = data.line[nextStage]; - targetData = db.pokemon[resolvedTarget]; + targetData = db.pokemon[resolvedTarget] ?? ensurePokemonInDB(resolvedTarget) ?? undefined; } } diff --git a/src/test-evolve/backup.ts b/src/test-evolve/backup.ts index c2f0af52..441be6ef 100644 --- a/src/test-evolve/backup.ts +++ b/src/test-evolve/backup.ts @@ -34,14 +34,34 @@ export interface BackupManifest { * harness fails loudly instead of writing to a non-existent path. */ export function getInstalledHooksPath(): string { + const checked: string[] = []; + const pluginRootHooks = join(PLUGIN_ROOT, 'hooks', 'hooks.json'); + checked.push(pluginRootHooks); if (existsSync(pluginRootHooks)) return pluginRootHooks; const marketplaceHooks = join(homedir(), '.claude', 'plugins', 'marketplaces', 'tkm', 'hooks', 'hooks.json'); + checked.push(marketplaceHooks); if (existsSync(marketplaceHooks)) return marketplaceHooks; + // Cached install location: ~/.claude/plugins/cache/tkm/tkm//hooks/hooks.json + // Scan every installed version directory so a release-style install still works. + const cacheBase = join(homedir(), '.claude', 'plugins', 'cache', 'tkm', 'tkm'); + checked.push(join(cacheBase, '', 'hooks', 'hooks.json')); + if (existsSync(cacheBase)) { + try { + const versions = readdirSync(cacheBase).sort().reverse(); + for (const v of versions) { + const candidate = join(cacheBase, v, 'hooks', 'hooks.json'); + if (existsSync(candidate)) return candidate; + } + } catch { + // Directory read errors fall through to the throw below. + } + } + throw new Error( - `Cannot locate active hooks.json. Checked:\n - ${pluginRootHooks}\n - ${marketplaceHooks}\n` + + `Cannot locate active hooks.json. Checked:\n${checked.map(p => ` - ${p}`).join('\n')}\n` + `Set CLAUDE_PLUGIN_ROOT to your tkm install location and retry.`, ); }