diff --git a/src/commands/analyze.ts b/src/commands/analyze.ts index 671a4df..eac7211 100644 --- a/src/commands/analyze.ts +++ b/src/commands/analyze.ts @@ -160,13 +160,17 @@ export const analyzeCommand = new Command('analyze') spinner.succeed(`Analysis complete for ${id}`); } - // Print summary for single runs - if (runIds.length === 1) { - printAnalysisSummary(analysis); - } else { - const methodology = analysis.rubricScore?.percentage ?? analysis.strategy.overallScore; - const score = calculateKSM(methodology, calculateEfficacyFromResults(result.challenge, result.modelVersion, allResults), getTokenEfficiency(result)); - console.log(colors.gray(` Score: ${score}/100 | Approach: ${analysis.behavior.approach}`)); + if (!analysis.parseFailed) { + // Print summary for single runs + if (runIds.length === 1) { + printAnalysisSummary(analysis); + } else { + const methodology = analysis.rubricScore?.percentage ?? analysis.strategy.overallScore; + const score = calculateKSM(methodology, calculateEfficacyFromResults(result.challenge, result.modelVersion, allResults), getTokenEfficiency(result)); + console.log(colors.gray(` Score: ${score}/100 | Approach: ${analysis.behavior.approach}`)); + } + } else if (runIds.length > 1) { + console.log(colors.gray(` Score: N/A (parse failed)`)); } } catch (error) { if (error instanceof QuotaExceededError) { diff --git a/src/commands/run.ts b/src/commands/run.ts index c4808ad..4dc398d 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -356,18 +356,20 @@ export const runCommand = new Command('run') spinnerAnalysis.succeed('Analysis complete'); } - // Print analysis summary - printAnalysisSummary(analysis); - - // Print score summary - const methodology = analysis.rubricScore?.percentage ?? analysis.strategy.overallScore; - const efficacy = calculateEfficacy(result.challenge, result.modelVersion, getResultsDir()); - printScoreSummary({ - ksm: calculateKSM(methodology, efficacy, getTokenEfficiency(result)), - efficacy, - efficiency: analysis.rubricScore?.percentage ?? analysis.strategy.exploitEfficiency ?? 0, - time: result.totalTime, - }); + if (!analysis.parseFailed) { + // Print analysis summary + printAnalysisSummary(analysis); + + // Print score summary + const methodology = analysis.rubricScore?.percentage ?? analysis.strategy.overallScore; + const efficacy = calculateEfficacy(result.challenge, result.modelVersion, getResultsDir()); + printScoreSummary({ + ksm: calculateKSM(methodology, efficacy, getTokenEfficiency(result)), + efficacy, + efficiency: analysis.rubricScore?.percentage ?? analysis.strategy.exploitEfficiency ?? 0, + time: result.totalTime, + }); + } console.log(colors.gray(`Analysis saved to: ${analysisPath}`)); } catch (analysisError) { diff --git a/src/interactive/results-flow.ts b/src/interactive/results-flow.ts index 941849c..bd5d7a9 100644 --- a/src/interactive/results-flow.ts +++ b/src/interactive/results-flow.ts @@ -83,8 +83,10 @@ async function showRunDetail(entry: LoadedResult): Promise { switch (action) { case 'analysis': - if (analysis) { + if (analysis && !analysis.parseFailed) { printAnalysisSummary(analysis); + } else if (analysis?.parseFailed) { + console.log(colors.gray(' Analysis could not be parsed. Retry with: oasis analyze ' + result.id)); } break; case 'report': diff --git a/src/interactive/run-flow.ts b/src/interactive/run-flow.ts index 655097e..c86b029 100644 --- a/src/interactive/run-flow.ts +++ b/src/interactive/run-flow.ts @@ -559,17 +559,19 @@ export async function runBenchmarkFlow(): Promise { spinnerAnalysis.succeed('Analysis complete'); } - printAnalysisSummary(analysis); - - const methodology = analysis.rubricScore?.percentage ?? analysis.strategy.overallScore; - const efficacy = calculateEfficacy(result.challenge, result.modelVersion, getResultsDir()); - runKsmScore = calculateKSM(methodology, efficacy, getTokenEfficiency(result)); - printScoreSummary({ - ksm: runKsmScore, - efficacy, - efficiency: analysis.rubricScore?.percentage ?? analysis.strategy.exploitEfficiency ?? 0, - time: result.totalTime, - }); + if (!analysis.parseFailed) { + printAnalysisSummary(analysis); + + const methodology = analysis.rubricScore?.percentage ?? analysis.strategy.overallScore; + const efficacy = calculateEfficacy(result.challenge, result.modelVersion, getResultsDir()); + runKsmScore = calculateKSM(methodology, efficacy, getTokenEfficiency(result)); + printScoreSummary({ + ksm: runKsmScore, + efficacy, + efficiency: analysis.rubricScore?.percentage ?? analysis.strategy.exploitEfficiency ?? 0, + time: result.totalTime, + }); + } console.log(colors.gray(` Analysis saved to: ${analysisPath}`)); } catch (analysisError) { diff --git a/tests/unit/export.test.ts b/tests/unit/export.test.ts new file mode 100644 index 0000000..7efeb30 --- /dev/null +++ b/tests/unit/export.test.ts @@ -0,0 +1,90 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import type { RunResult, AnalysisResult } from '../../src/lib/types.js'; + +// Mock @inquirer/prompts before importing the module under test +vi.mock('@inquirer/prompts', () => ({ + select: vi.fn(), + input: vi.fn(), +})); + +const { promptExport } = await import('../../src/lib/export.js'); + +function makeRunResult(): RunResult { + return { + id: 'test-run-001', + model: 'anthropic', + modelVersion: 'claude-3', + challenge: 'gatekeeper', + startTime: new Date(), + endTime: new Date(), + success: false, + flag: null, + totalTime: 10, + iterations: 1, + tokens: { input: 0, output: 0, total: 0 }, + steps: [], + techniquesUsed: [], + tacticBreakdown: {}, + methodologies: [], + toolsUsed: [], + methodologyBreakdown: {}, + }; +} + +function makeAnalysis(overrides: Partial = {}): AnalysisResult { + return { + runId: 'test-run-001', + analyzedAt: new Date(), + analyzerModel: 'claude-3', + attackChain: { phases: [], techniques: [], killChainCoverage: [] }, + narrative: { summary: 'Test', detailed: 'Test', keyFindings: [] }, + behavior: { approach: 'methodical', approachDescription: '', strengths: [], inefficiencies: [], decisionQuality: 50 }, + strategy: { reconQuality: 50, exploitEfficiency: 50, adaptability: 50, overallScore: 50, scoreBreakdown: '' }, + ...overrides, + }; +} + +describe('promptExport', () => { + let consoleSpy: ReturnType; + + beforeEach(() => { + consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + }); + + it('skips export when analysis is undefined', async () => { + await promptExport(makeRunResult(), undefined); + const output = consoleSpy.mock.calls.map(c => c[0]).join('\n'); + expect(output).toContain('No analysis available'); + expect(output).toContain('oasis analyze test-run-001'); + }); + + it('skips export when analysis.parseFailed is true', async () => { + const analysis = makeAnalysis({ parseFailed: true }); + await promptExport(makeRunResult(), analysis); + const output = consoleSpy.mock.calls.map(c => c[0]).join('\n'); + expect(output).toContain('No analysis available'); + expect(output).toContain('oasis analyze test-run-001'); + }); + + it('does not skip export for valid analysis (parseFailed undefined)', async () => { + const { select } = await import('@inquirer/prompts'); + (select as ReturnType).mockResolvedValueOnce('done'); + + const analysis = makeAnalysis(); + await promptExport(makeRunResult(), analysis); + const output = consoleSpy.mock.calls.map(c => c[0]).join('\n'); + expect(output).not.toContain('No analysis available'); + expect(output).toContain('oasis report test-run-001'); + }); + + it('does not skip export for explicit parseFailed: false', async () => { + const { select } = await import('@inquirer/prompts'); + (select as ReturnType).mockResolvedValueOnce('done'); + + const analysis = makeAnalysis({ parseFailed: false }); + await promptExport(makeRunResult(), analysis); + const output = consoleSpy.mock.calls.map(c => c[0]).join('\n'); + expect(output).not.toContain('No analysis available'); + expect(output).toContain('oasis report test-run-001'); + }); +});