From 1fdeac2caa4d91851c084ee331303b629ea45bb8 Mon Sep 17 00:00:00 2001 From: Marshall Livingston Date: Thu, 26 Feb 2026 15:19:08 -0700 Subject: [PATCH] fix: stop lying to users when analysis parsing fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the analyzer LLM returns truncated JSON, parseAnalysisResponse was catching the error, printing to stderr where nobody reads it, and returning a zeroed-out result. Every caller then printed "Analysis complete" with a green checkmark. And then every load site that read the result back from disk would render all-zero scores as if the model genuinely scored zero on everything. Added a parseFailed flag to AnalysisResult so callers can distinguish "the model scored you low" from "we couldn't even read the response." All three analysis call sites now show a warning with retry instructions. Added a shared loadAnalysisResult helper that returns null for parseFailed results — all five disk load sites (results list, compare, challenge view, report, interactive browser) now use it instead of inline JSON.parse. Failed analyses show as "-" in tables and are excluded from reports, same as if no analysis was run. Closes #56 --- src/commands/analyze.ts | 7 ++++++- src/commands/report.ts | 10 +++------- src/commands/results.ts | 24 ++++-------------------- src/commands/run.ts | 7 ++++++- src/interactive/helpers.ts | 9 ++------- src/interactive/run-flow.ts | 7 ++++++- src/lib/analyzer.ts | 3 +-- src/lib/runner.ts | 17 ++++++++++++++++- src/lib/types.ts | 1 + 9 files changed, 45 insertions(+), 40 deletions(-) diff --git a/src/commands/analyze.ts b/src/commands/analyze.ts index 0cd7de4..671a4df 100644 --- a/src/commands/analyze.ts +++ b/src/commands/analyze.ts @@ -153,7 +153,12 @@ export const analyzeCommand = new Command('analyze') }); saveAnalysisResult(id, analysis, getResultsDir()); - spinner.succeed(`Analysis complete for ${id}`); + + if (analysis.parseFailed) { + spinner.warn(`Analysis failed for ${id} — could not parse LLM response (truncated or malformed JSON)`); + } else { + spinner.succeed(`Analysis complete for ${id}`); + } // Print summary for single runs if (runIds.length === 1) { diff --git a/src/commands/report.ts b/src/commands/report.ts index 6e0681d..8904b8b 100644 --- a/src/commands/report.ts +++ b/src/commands/report.ts @@ -5,6 +5,7 @@ import { colors, status } from '../lib/display.js'; import { getResultsDir } from '../lib/config.js'; import { calculateKSM, calculateEfficacyFromResults, getTokenEfficiency } from '../lib/scoring.js'; import { resolveAnalysisPath, resolveResultPath, InvalidRunIdError, ResultPathEscapeError } from '../lib/results-path.js'; +import { loadAnalysisResult } from '../lib/runner.js'; import { copyToClipboard, printColorReport, @@ -74,13 +75,8 @@ export const reportCommand = new Command('report') process.exit(1); } - // Load analysis if available - let analysis: AnalysisResult | undefined; - if (existsSync(analysisPath)) { - try { - analysis = JSON.parse(readFileSync(analysisPath, 'utf-8')); - } catch {} - } + // Load analysis if available (null if parse failed) + const analysis = loadAnalysisResult(analysisPath) ?? undefined; const format = options.format.toLowerCase(); let output = ''; diff --git a/src/commands/results.ts b/src/commands/results.ts index 02f3ac9..62a8d03 100644 --- a/src/commands/results.ts +++ b/src/commands/results.ts @@ -6,6 +6,7 @@ import { colors, status, formatScore, formatTime, printBox, sectionHeader } from import { calculateKSM, calculateEfficacyFromResults, getTokenEfficiency } from '../lib/scoring.js'; import { getResultsDir, getChallengesDir } from '../lib/config.js'; import { resolveAnalysisPath, resolveResultPath, InvalidRunIdError, ResultPathEscapeError } from '../lib/results-path.js'; +import { loadAnalysisResult } from '../lib/runner.js'; import type { RunResult, AnalysisResult, ChallengeConfig } from '../lib/types.js'; export const resultsCommand = new Command('results') @@ -55,12 +56,7 @@ resultsCommand if (options.challenge && result.challenge !== options.challenge) continue; const analysisPath = pathResolve(getResultsDir(), `${file.id}.analysis.json`); - let analysis: AnalysisResult | null = null; - if (existsSync(analysisPath)) { - try { - analysis = JSON.parse(readFileSync(analysisPath, 'utf-8')); - } catch {} - } + const analysis = loadAnalysisResult(analysisPath); loaded.push({ id: file.id, result, analysis }); } catch { @@ -314,13 +310,7 @@ resultsCommand const filePath = pathResolve(resultsDir, file); const result: RunResult = JSON.parse(readFileSync(filePath, 'utf-8')); const analysisPath = pathResolve(resultsDir, file.replace('.json', '.analysis.json')); - let analysis: AnalysisResult | null = null; - - if (existsSync(analysisPath)) { - try { - analysis = JSON.parse(readFileSync(analysisPath, 'utf-8')); - } catch {} - } + const analysis = loadAnalysisResult(analysisPath); allLoadedEntries.push({ result, analysis }); } catch {} @@ -512,13 +502,7 @@ function compareByChallengeId(challengeId: string): void { if (result.challenge !== challengeId) continue; const analysisPath = pathResolve(resultsDir, file.replace('.json', '.analysis.json')); - let analysis: AnalysisResult | null = null; - - if (existsSync(analysisPath)) { - try { - analysis = JSON.parse(readFileSync(analysisPath, 'utf-8')); - } catch {} - } + const analysis = loadAnalysisResult(analysisPath); loadedEntries.push({ result, analysis }); } catch {} diff --git a/src/commands/run.ts b/src/commands/run.ts index 483fed1..6816d3e 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -349,7 +349,12 @@ export const runCommand = new Command('run') }); const { jsonPath: analysisPath } = saveAnalysisResult(result.id, analysis, getResultsDir()); - spinnerAnalysis.succeed('Analysis complete'); + + if (analysis.parseFailed) { + spinnerAnalysis.warn('Analysis failed — could not parse LLM response (truncated or malformed JSON). Retry with: oasis analyze ' + result.id); + } else { + spinnerAnalysis.succeed('Analysis complete'); + } // Print analysis summary printAnalysisSummary(analysis); diff --git a/src/interactive/helpers.ts b/src/interactive/helpers.ts index 099ee2f..6dd5afc 100644 --- a/src/interactive/helpers.ts +++ b/src/interactive/helpers.ts @@ -3,6 +3,7 @@ import { resolve } from 'path'; import { getChallengesDir, getResultsDir } from '../lib/config.js'; import { calculateKSM, calculateEfficacyFromResults, getTokenEfficiency } from '../lib/scoring.js'; import { fetchRegistryIndex, fetchChallengeConfig } from '../lib/registry.js'; +import { loadAnalysisResult } from '../lib/runner.js'; import type { RegistryEntry } from '../lib/registry.js'; import type { ChallengeConfig, RunResult, AnalysisResult } from '../lib/types.js'; @@ -111,13 +112,7 @@ export function loadRecentResults(limit = 20): LoadedResult[] { try { const result: RunResult = JSON.parse(readFileSync(file.path, 'utf-8')); const analysisPath = resolve(dir, `${file.id}.analysis.json`); - let analysis: AnalysisResult | null = null; - - if (existsSync(analysisPath)) { - try { - analysis = JSON.parse(readFileSync(analysisPath, 'utf-8')); - } catch { /* skip */ } - } + const analysis = loadAnalysisResult(analysisPath); allEntries.push({ id: file.id, result, analysis }); } catch { /* skip malformed */ } diff --git a/src/interactive/run-flow.ts b/src/interactive/run-flow.ts index 9994f70..655097e 100644 --- a/src/interactive/run-flow.ts +++ b/src/interactive/run-flow.ts @@ -552,7 +552,12 @@ export async function runBenchmarkFlow(): Promise { runAnalysisResult = analysis; const { jsonPath: analysisPath } = saveAnalysisResult(result.id, analysis, getResultsDir()); - spinnerAnalysis.succeed('Analysis complete'); + + if (analysis.parseFailed) { + spinnerAnalysis.warn('Analysis failed — could not parse LLM response (truncated or malformed JSON). Retry with: oasis analyze ' + result.id); + } else { + spinnerAnalysis.succeed('Analysis complete'); + } printAnalysisSummary(analysis); diff --git a/src/lib/analyzer.ts b/src/lib/analyzer.ts index b7a43ff..83d0133 100644 --- a/src/lib/analyzer.ts +++ b/src/lib/analyzer.ts @@ -334,12 +334,11 @@ async function parseAnalysisResponse( return analysisResult; } catch (error) { - console.error('Failed to parse analysis response:', error); - return { runId, analyzedAt: new Date(), analyzerModel: DEFAULT_ANALYZER_MODEL, + parseFailed: true, attackChain: { phases: [], techniques: [], killChainCoverage: [] }, narrative: { summary: 'Analysis parsing failed', detailed: `Error: ${error}`, keyFindings: [] }, behavior: { approach: 'exploratory', approachDescription: 'Unable to determine', strengths: [], inefficiencies: [], decisionQuality: 0 }, diff --git a/src/lib/runner.ts b/src/lib/runner.ts index 94ffd22..b373683 100644 --- a/src/lib/runner.ts +++ b/src/lib/runner.ts @@ -4,7 +4,7 @@ import Anthropic from '@anthropic-ai/sdk'; import OpenAI from 'openai'; import { execFileSync } from 'child_process'; import chalk from 'chalk'; -import { writeFileSync, mkdirSync, existsSync } from 'fs'; +import { writeFileSync, readFileSync, mkdirSync, existsSync } from 'fs'; import { randomUUID } from 'crypto'; import { resolve } from 'path'; import { wasSuccessful, classifyToAttack, classifyCommand } from './classifier.js'; @@ -797,3 +797,18 @@ export function saveAnalysisResult( return { jsonPath, txtPath }; } + +/** + * Load an analysis result from disk. Returns null if the file doesn't exist, + * can't be parsed, or the analysis itself failed (parseFailed). + */ +export function loadAnalysisResult(analysisPath: string): AnalysisResult | null { + if (!existsSync(analysisPath)) return null; + try { + const analysis: AnalysisResult = JSON.parse(readFileSync(analysisPath, 'utf-8')); + if (analysis.parseFailed) return null; + return analysis; + } catch { + return null; + } +} diff --git a/src/lib/types.ts b/src/lib/types.ts index 3c67738..fe6ee76 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -142,6 +142,7 @@ export interface AnalysisResult { scoreBreakdown: string; }; rubricScore?: RubricScore; + parseFailed?: boolean; } // =============================================================================